<?xml version="1.0" encoding="utf-8" ?><gmi:MI_Metadata
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xmlns:gco="http://www.isotc211.org/2005/gco"
  xmlns:gfc="http://www.isotc211.org/2005/gfc"
  xmlns:gmd="http://www.isotc211.org/2005/gmd"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  xmlns:srv="http://www.isotc211.org/2005/srv"
  xmlns:gml="http://www.opengis.net/gml/3.2"
  xmlns:gsr="http://www.isotc211.org/2005/gsr"
  xmlns:gss="http://www.isotc211.org/2005/gss"
  xmlns:gts="http://www.isotc211.org/2005/gts"
  xmlns:gmi="http://www.isotc211.org/2005/gmi"
  xmlns:gmx="http://www.isotc211.org/2005/gmx"
  xsi:schemaLocation="http://www.isotc211.org/2005/gmi https://data.noaa.gov/resources/iso19139/schema.xsd">
  <gmd:fileIdentifier>
    <gco:CharacterString>http://lod.bco-dmo.org/id/dataset/933610</gco:CharacterString>
  </gmd:fileIdentifier>
  <gmd:language>
    <gco:CharacterString>eng; USA</gco:CharacterString>
  </gmd:language>
  <gmd:characterSet>
    <gmd:MD_CharacterSetCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_CharacterSetCode" codeListValue="utf8">utf8</gmd:MD_CharacterSetCode>
  </gmd:characterSet>
  <gmd:hierarchyLevel>
    <gmd:MD_ScopeCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ScopeCode" codeListValue="dataset" codeSpace="005">dataset</gmd:MD_ScopeCode>
  </gmd:hierarchyLevel>
  <gmd:hierarchyLevelName>
    <gco:CharacterString>Highest level of data collection, from a common set of sensors or instrumentation, usually within the same research project</gco:CharacterString>
  </gmd:hierarchyLevelName>
  <gmd:contact>
    <gmd:CI_ResponsibleParty>
  <gmd:organisationName>
    <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/affiliation/191.rdf" xlink:actuate="onRequest">Biological and Chemical Oceanography Data Management Office (BCO-DMO)</gmx:Anchor>
  </gmd:organisationName>
  <gmd:contactInfo>
    <gmd:CI_Contact>
		  <gmd:phone>
		    <gmd:CI_Telephone>
				  <gmd:voice>
				    <gco:CharacterString>Unavailable</gco:CharacterString>
				  </gmd:voice>
				  <gmd:facsimile>
				    <gco:CharacterString>508-289-2009</gco:CharacterString>
				  </gmd:facsimile>
				</gmd:CI_Telephone>
		  </gmd:phone>
		  <gmd:address>
		    <gmd:CI_Address>
				  <gmd:deliveryPoint>
				    <gco:CharacterString>WHOI MS#36</gco:CharacterString>
				  </gmd:deliveryPoint>
				  <gmd:city>
				    <gco:CharacterString>Woods Hole</gco:CharacterString>
				  </gmd:city>
				  <gmd:administrativeArea>
				    <gco:CharacterString>MA</gco:CharacterString>
				  </gmd:administrativeArea>
				  <gmd:postalCode>
				    <gco:CharacterString>02543</gco:CharacterString>
				  </gmd:postalCode>
				  <gmd:country>
				    <gco:CharacterString>USA</gco:CharacterString>
				  </gmd:country>
				  <gmd:electronicMailAddress>
				    <gco:CharacterString>info@bco-dmo.org</gco:CharacterString>
				  </gmd:electronicMailAddress>
		    </gmd:CI_Address>
		  </gmd:address>
      <gmd:onlineResource>
          <gmd:CI_OnlineResource>
            <gmd:linkage>
              <gmd:URL>http://www.bco-dmo.org</gmd:URL>
            </gmd:linkage>
          </gmd:CI_OnlineResource>
        </gmd:onlineResource>
		  <gmd:hoursOfService>
        <gco:CharacterString>Monday - Friday 8:00am - 5:00pm</gco:CharacterString>
      </gmd:hoursOfService>
		  <gmd:contactInstructions>
		    <gco:CharacterString>For questions regarding this resource, please contact BCO-DMO via the email address provided.</gco:CharacterString>
		  </gmd:contactInstructions>
		</gmd:CI_Contact>
  </gmd:contactInfo>
  <gmd:role>
    <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact"  codeSpace="007">pointOfContact</gmd:CI_RoleCode>
  </gmd:role>
</gmd:CI_ResponsibleParty>
  </gmd:contact>
  <gmd:dateStamp>
    <gco:Date>2024-07-22</gco:Date>
  </gmd:dateStamp>
  <gmd:metadataStandardName>
    <gco:CharacterString>ISO 19115-2 Geographic Information - Metadata - Part 2: Extensions for Imagery and Gridded Data</gco:CharacterString>
  </gmd:metadataStandardName>
  <gmd:metadataStandardVersion>
    <gco:CharacterString>ISO 19115-2:2009(E)</gco:CharacterString>
  </gmd:metadataStandardVersion>
  <gmd:referenceSystemInfo xlink:href="https://data.noaa.gov/docucomp/895cc120-95ed-11e0-aa80-0800200c9a66" xlink:title="WGS 84 / World Mercator"/>
  <gmd:identificationInfo>
    <gmd:MD_DataIdentification>
      <gmd:citation>
        <gmd:CI_Citation>
          <gmd:title>
            <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset/933610.rdf" xlink:actuate="onRequest">Collection of subsurface bacteria Nitrospirota and Nitrospinota genome data including IMG and NCBI accessions for sequence datasets in June 2021 (Slow Life in Crust project)</gmx:Anchor>
          </gmd:title>
          <gmd:date>
            <gmd:CI_Date>
              <gmd:date>
                <gco:Date>2024-12-29</gco:Date>
              </gmd:date>
              <gmd:dateType>
                <gmd:CI_DateTypeCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="publication" codeSpace="ISOTC211/19115">publication</gmd:CI_DateTypeCode>
              </gmd:dateType>
            </gmd:CI_Date>
          </gmd:date>
          <gmd:date>
            <gmd:CI_Date>
              <gmd:date>
                <gco:Date>2024-12-29</gco:Date>
              </gmd:date>
              <gmd:dateType>
                <gmd:CI_DateTypeCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="revision" codeSpace="ISOTC211/19115">revision</gmd:CI_DateTypeCode>
              </gmd:dateType>
            </gmd:CI_Date>
          </gmd:date>
          <gmd:edition>
            <gco:CharacterString/>
          </gmd:edition>
          <gmd:identifier>
            <gmd:MD_Identifier>
              <gmd:authority>
                <gmd:CI_Citation>
                  <gmd:title>
                    <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/affiliation/272.rdf" xlink:actuate="onRequest">Marine Biological Laboratory/Woods Hole Oceanographic Institution Library (MBLWHOI DLA)</gmx:Anchor>
                  </gmd:title>
                  <gmd:date>
                    <gmd:CI_Date>
                      <gmd:date>
                        <gco:Date>2025-02-04</gco:Date>
                      </gmd:date>
                      <gmd:dateType>
                        <gmd:CI_DateTypeCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="publication" codeSpace="002">publication</gmd:CI_DateTypeCode>
                      </gmd:dateType>
                    </gmd:CI_Date>
                  </gmd:date>
                                  </gmd:CI_Citation>
              </gmd:authority>
              <gmd:code>
                <gmx:Anchor xlink:href="https://doi.org/10.26008/1912/bco-dmo.933610.1" xlink:title="DOI" xlink:actuate="onRequest">https://doi.org/10.26008/1912/bco-dmo.933610.1</gmx:Anchor>
              </gmd:code>
            </gmd:MD_Identifier>
          </gmd:identifier>
          <gmd:citedResponsibleParty>
           <gmd:CI_ResponsibleParty>
              <gmd:individualName>
                <gmx:Anchor xlink:href="http://orcid.org/0000-0002-6233-3578" xlink:title="ORCID" xlink:actuate="onRequest">Beth N. Orcutt</gmx:Anchor>
              </gmd:individualName>
              <gmd:organisationName>
                <gmx:Anchor xlink:href="https://ror.org/03v2r6x37" xlink:title="ROR ID" xlink:actuate="onRequest">Bigelow Laboratory for Ocean Sciences</gmx:Anchor>
              </gmd:organisationName>
              <gmd:positionName>
               <gco:CharacterString></gco:CharacterString>
              </gmd:positionName>
              <gmd:role>
                <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="principalInvestigator" codeSpace="008">principalInvestigator</gmd:CI_RoleCode>
              </gmd:role>
            </gmd:CI_ResponsibleParty>
          </gmd:citedResponsibleParty>
          <gmd:citedResponsibleParty>
           <gmd:CI_ResponsibleParty>
              <gmd:individualName>
                <gmx:Anchor xlink:href="http://orcid.org/0000-0002-6456-3270" xlink:title="ORCID" xlink:actuate="onRequest">Timothy D'Angelo</gmx:Anchor>
              </gmd:individualName>
              <gmd:organisationName>
                <gmx:Anchor xlink:href="https://ror.org/03v2r6x37" xlink:title="ROR ID" xlink:actuate="onRequest">Bigelow Laboratory for Ocean Sciences</gmx:Anchor>
              </gmd:organisationName>
              <gmd:positionName>
               <gco:CharacterString></gco:CharacterString>
              </gmd:positionName>
              <gmd:role>
                <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="principalInvestigator" codeSpace="008">principalInvestigator</gmd:CI_RoleCode>
              </gmd:role>
            </gmd:CI_ResponsibleParty>
          </gmd:citedResponsibleParty>
          <gmd:citedResponsibleParty>
            <gmd:CI_ResponsibleParty>
  <gmd:organisationName>
    <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/affiliation/191.rdf" xlink:actuate="onRequest">Biological and Chemical Oceanography Data Management Office (BCO-DMO)</gmx:Anchor>
  </gmd:organisationName>
  <gmd:contactInfo>
    <gmd:CI_Contact>
		  <gmd:phone>
		    <gmd:CI_Telephone>
				  <gmd:voice>
				    <gco:CharacterString>Unavailable</gco:CharacterString>
				  </gmd:voice>
				  <gmd:facsimile>
				    <gco:CharacterString>508-289-2009</gco:CharacterString>
				  </gmd:facsimile>
				</gmd:CI_Telephone>
		  </gmd:phone>
		  <gmd:address>
		    <gmd:CI_Address>
				  <gmd:deliveryPoint>
				    <gco:CharacterString>WHOI MS#36</gco:CharacterString>
				  </gmd:deliveryPoint>
				  <gmd:city>
				    <gco:CharacterString>Woods Hole</gco:CharacterString>
				  </gmd:city>
				  <gmd:administrativeArea>
				    <gco:CharacterString>MA</gco:CharacterString>
				  </gmd:administrativeArea>
				  <gmd:postalCode>
				    <gco:CharacterString>02543</gco:CharacterString>
				  </gmd:postalCode>
				  <gmd:country>
				    <gco:CharacterString>USA</gco:CharacterString>
				  </gmd:country>
				  <gmd:electronicMailAddress>
				    <gco:CharacterString>info@bco-dmo.org</gco:CharacterString>
				  </gmd:electronicMailAddress>
		    </gmd:CI_Address>
		  </gmd:address>
      <gmd:onlineResource>
          <gmd:CI_OnlineResource>
            <gmd:linkage>
              <gmd:URL>http://www.bco-dmo.org</gmd:URL>
            </gmd:linkage>
          </gmd:CI_OnlineResource>
        </gmd:onlineResource>
		  <gmd:hoursOfService>
        <gco:CharacterString>Monday - Friday 8:00am - 5:00pm</gco:CharacterString>
      </gmd:hoursOfService>
		  <gmd:contactInstructions>
		    <gco:CharacterString>For questions regarding this resource, please contact BCO-DMO via the email address provided.</gco:CharacterString>
		  </gmd:contactInstructions>
		</gmd:CI_Contact>
  </gmd:contactInfo>
  <gmd:role>
    <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="publisher" >publisher</gmd:CI_RoleCode>
  </gmd:role>
</gmd:CI_ResponsibleParty>
          </gmd:citedResponsibleParty>
          <gmd:otherCitationDetails>
            <gco:CharacterString>Cite this dataset as: D'Angelo, T., Orcutt, B. N. (2025) Collection of subsurface bacteria Nitrospirota and Nitrospinota genome data including IMG and NCBI accessions for sequence datasets in June 2021 (Slow Life in Crust project). Biological and Chemical Oceanography Data Management Office (BCO-DMO). (Version 1) Version Date 2024-12-29 [if applicable, indicate subset used]. doi:10.26008/1912/bco-dmo.933610.1 [access date]</gco:CharacterString>
          </gmd:otherCitationDetails>
          </gmd:CI_Citation>
      </gmd:citation>
      <gmd:abstract>
        <gco:CharacterString>Methods and Sampling: &amp;lt;p&amp;gt;&amp;lt;em&amp;gt;Genomic dataset collection, curation and quality control: &amp;lt;/em&amp;gt;This study used publicly available genome assemblies. Existing publicly available genome assemblies were downloaded from the National Center for Biotechnology Investigation (NCBI) and the Integrated Microbial Genomes (IMG) database of the U.S. Department of Energy’s Joint Genome Institute in June 2021. The Genome Taxonomy Database (GTDB) website (release 202) was used to access lists of NCBI assembly accession numbers for the following GTDB-assigned phyla: &amp;lt;em&amp;gt;Nitrospinota&amp;lt;/em&amp;gt;, &amp;lt;em&amp;gt;Nitrospinota_A&amp;lt;/em&amp;gt; (now called &amp;lt;em&amp;gt;Tectomicrobia&amp;lt;/em&amp;gt;), &amp;lt;em&amp;gt;Nitrospinota_B&amp;lt;/em&amp;gt;, &amp;lt;em&amp;gt;Nitrospirota&amp;lt;/em&amp;gt;, &amp;lt;em&amp;gt;Nitrospirota_A&amp;lt;/em&amp;gt; (&amp;lt;em&amp;gt;Leptospirilla&amp;lt;/em&amp;gt;). The IMG assemblies were found using the same GTDB taxonomy classifier using the search function on the IMG website. IMG metagenome assemblies that were designated as “public” and “published” were also downloaded for these phyla. Duplicate entries between IMG and NCBI were manually removed.&amp;lt;/p&amp;gt;

&amp;lt;p&amp;gt;&amp;amp;nbsp;&amp;lt;/p&amp;gt;</gco:CharacterString>
      </gmd:abstract>
      <gmd:credit>
        <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/award/700323.rdf" xlink:title="OCE-1737017" xlink:actuate="onRequest">Funding provided by NSF Division of Ocean Sciences (NSF OCE) Award Number: OCE-1737017 Award URL: https://www.nsf.gov/awardsearch/show-award?AWD_ID=1737017</gmx:Anchor>
      </gmd:credit>
        <gmd:status>
        <gmd:MD_ProgressCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ProgressCode" codeListValue="completed">completed</gmd:MD_ProgressCode>
      </gmd:status>
      <gmd:pointOfContact>
        <gmd:CI_ResponsibleParty>
          <gmd:individualName>
            <gmx:Anchor xlink:href="http://orcid.org/0000-0002-6233-3578" xlink:title="ORCID" xlink:actuate="onRequest">Beth N. Orcutt</gmx:Anchor>
          </gmd:individualName>
          <gmd:organisationName>
            <gmx:Anchor xlink:href="https://ror.org/03v2r6x37" xlink:title="ROR ID" xlink:actuate="onRequest">Bigelow Laboratory for Ocean Sciences</gmx:Anchor>
          </gmd:organisationName>
          <gmd:contactInfo>
            <gmd:CI_Contact>
            <gmd:phone>
                <gmd:CI_Telephone>
                  <gmd:voice>
                    <gco:CharacterString>207-315-2567 ext 312</gco:CharacterString>
                  </gmd:voice>
                </gmd:CI_Telephone>
              </gmd:phone>
            <gmd:address>
                <gmd:CI_Address>
                                  <gmd:deliveryPoint>
                    <gco:CharacterString>60 Bigelow Drive PO Box 380</gco:CharacterString>
                  </gmd:deliveryPoint>
                                  <gmd:city>
                    <gco:CharacterString>East Boothbay</gco:CharacterString>
                  </gmd:city>                  <gmd:administrativeArea>
                    <gco:CharacterString>Maine</gco:CharacterString>
                  </gmd:administrativeArea>                  <gmd:postalCode>
                    <gco:CharacterString>04544</gco:CharacterString>
                  </gmd:postalCode>                  <gmd:country>
                    <gco:CharacterString>USA</gco:CharacterString>
                  </gmd:country>                  <gmd:electronicMailAddress>
                    <gco:CharacterString>borcutt@bigelow.org</gco:CharacterString>
                  </gmd:electronicMailAddress>
                </gmd:CI_Address>
              </gmd:address>
            </gmd:CI_Contact>
          </gmd:contactInfo>
          <gmd:role>
            <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact" codeSpace="007">pointOfContact</gmd:CI_RoleCode>
          </gmd:role>
        </gmd:CI_ResponsibleParty>
      </gmd:pointOfContact>
      <gmd:pointOfContact>
        <gmd:CI_ResponsibleParty>
          <gmd:individualName>
            <gmx:Anchor xlink:href="http://orcid.org/0000-0002-6456-3270" xlink:title="ORCID" xlink:actuate="onRequest">Timothy D'Angelo</gmx:Anchor>
          </gmd:individualName>
          <gmd:organisationName>
            <gmx:Anchor xlink:href="https://ror.org/03v2r6x37" xlink:title="ROR ID" xlink:actuate="onRequest">Bigelow Laboratory for Ocean Sciences</gmx:Anchor>
          </gmd:organisationName>
          <gmd:contactInfo>
            <gmd:CI_Contact>
            <gmd:phone>
                <gmd:CI_Telephone>
                  <gmd:voice>
                    <gco:CharacterString>207-315-2567</gco:CharacterString>
                  </gmd:voice>
                </gmd:CI_Telephone>
              </gmd:phone>
            <gmd:address>
                <gmd:CI_Address>
                                  <gmd:deliveryPoint>
                    <gco:CharacterString>60 Bigelow Drive</gco:CharacterString>
                  </gmd:deliveryPoint>
                                  <gmd:city>
                    <gco:CharacterString>East Boothbay</gco:CharacterString>
                  </gmd:city>                  <gmd:administrativeArea>
                    <gco:CharacterString>ME</gco:CharacterString>
                  </gmd:administrativeArea>                  <gmd:postalCode>
                    <gco:CharacterString>04544</gco:CharacterString>
                  </gmd:postalCode>                  <gmd:country>
                    <gco:CharacterString>USA</gco:CharacterString>
                  </gmd:country>                  <gmd:electronicMailAddress>
                    <gco:CharacterString>tdangelo@bigelow.org</gco:CharacterString>
                  </gmd:electronicMailAddress>
                </gmd:CI_Address>
              </gmd:address>
            </gmd:CI_Contact>
          </gmd:contactInfo>
          <gmd:role>
            <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact" codeSpace="007">pointOfContact</gmd:CI_RoleCode>
          </gmd:role>
        </gmd:CI_ResponsibleParty>
      </gmd:pointOfContact>
            <gmd:resourceMaintenance>
        <gmd:MD_MaintenanceInformation>
          <gmd:maintenanceAndUpdateFrequency>
            <gmd:MD_MaintenanceFrequencyCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_MaintenanceFrequencyCode" codeListValue="asNeeded" codeSpace="009">asNeeded</gmd:MD_MaintenanceFrequencyCode>
          </gmd:maintenanceAndUpdateFrequency>
          <gmd:maintenanceNote>
            <gco:CharacterString>Dataset Version: 1</gco:CharacterString>
          </gmd:maintenanceNote>
          </gmd:MD_MaintenanceInformation>
      </gmd:resourceMaintenance>
      <gmd:resourceFormat>
        <gmd:MD_Format>
          <gmd:name>
             <gco:CharacterString>Unknown</gco:CharacterString>
          </gmd:name>
          <gmd:version gco:nilReason="unknown"/>
          </gmd:MD_Format>
      </gmd:resourceFormat>
            <gmd:descriptiveKeywords>
        <gmd:MD_Keywords>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947368.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/960.rdf" xlink:actuate="onRequest">ID</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947369.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1971.rdf" xlink:actuate="onRequest">IMG_genome_id</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947370.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/873475.rdf" xlink:actuate="onRequest">GenBank_assembly</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947371.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/854959.rdf" xlink:actuate="onRequest">Sample</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947372.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/854959.rdf" xlink:actuate="onRequest">Corrected_BioSample</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947373.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/854958.rdf" xlink:actuate="onRequest">BioProject</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947374.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1070.rdf" xlink:actuate="onRequest">release_date</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947375.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1908.rdf" xlink:actuate="onRequest">last_updated_date</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947376.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1908.rdf" xlink:actuate="onRequest">publication_date</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947377.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/994.rdf" xlink:actuate="onRequest">Domain</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947378.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/928.rdf" xlink:actuate="onRequest">Phylum</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947379.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/2008.rdf" xlink:actuate="onRequest">Class</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947380.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/912.rdf" xlink:actuate="onRequest">Order</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947381.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/830.rdf" xlink:actuate="onRequest">Family</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947382.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/2009.rdf" xlink:actuate="onRequest">Genus</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947383.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/976.rdf" xlink:actuate="onRequest">Species</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947384.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/995.rdf" xlink:actuate="onRequest">NCBI_organism_taxid</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947385.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1740.rdf" xlink:actuate="onRequest">Isolation_Source</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947386.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">IsolationPlot</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947387.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1740.rdf" xlink:actuate="onRequest">Location</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947388.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">Coordinates</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947389.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/730.rdf" xlink:actuate="onRequest">Latitude</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947390.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/731.rdf" xlink:actuate="onRequest">Longitude</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947391.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">Completeness</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947392.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">Contamination</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947393.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">Genome_Size_bp</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947394.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">Estimated_Genome_Length</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947395.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">num_predicted_genes</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947396.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">GC</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/dataset-parameter/947397.rdf" xlink:title="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:actuate="onRequest">Coding_density</gmx:Anchor>
        </gmd:keyword>
        <gmd:type>
            <gmd:MD_KeywordTypeCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_KeywordTypeCode" codeListValue="theme" codeSpace="005">theme</gmd:MD_KeywordTypeCode>
          </gmd:type>
          <gmd:thesaurusName>
            <gmd:CI_Citation>
              <gmd:title>
                <gco:CharacterString>None, User defined</gco:CharacterString>
              </gmd:title>
              <gmd:date gco:nilReason="unknown"/>
            </gmd:CI_Citation>
          </gmd:thesaurusName>
        </gmd:MD_Keywords>
      </gmd:descriptiveKeywords>
            <gmd:descriptiveKeywords>
        <gmd:MD_Keywords>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/960.rdf" xlink:title="Parameter" xlink:actuate="onRequest">sample identification</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/1971.rdf" xlink:title="Parameter" xlink:actuate="onRequest">accession number</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/873475.rdf" xlink:title="Parameter" xlink:actuate="onRequest">NCBI Genbank nucleotide accession</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/854959.rdf" xlink:title="Parameter" xlink:actuate="onRequest">NCBI BioSample accession</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/854958.rdf" xlink:title="Parameter" xlink:actuate="onRequest">NCBI BioProject accession</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/1070.rdf" xlink:title="Parameter" xlink:actuate="onRequest">date</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/1908.rdf" xlink:title="Parameter" xlink:actuate="onRequest">ISO_DateTime_UTC</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/994.rdf" xlink:title="Parameter" xlink:actuate="onRequest">taxon</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/928.rdf" xlink:title="Parameter" xlink:actuate="onRequest">phylum</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/2008.rdf" xlink:title="Parameter" xlink:actuate="onRequest">taxonomic class</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/912.rdf" xlink:title="Parameter" xlink:actuate="onRequest">order</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/830.rdf" xlink:title="Parameter" xlink:actuate="onRequest">family</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/2009.rdf" xlink:title="Parameter" xlink:actuate="onRequest">genus</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/976.rdf" xlink:title="Parameter" xlink:actuate="onRequest">species scientific name (binomial)</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/995.rdf" xlink:title="Parameter" xlink:actuate="onRequest">taxon_code</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/1740.rdf" xlink:title="Parameter" xlink:actuate="onRequest">site description</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/1073.rdf" xlink:title="Parameter" xlink:actuate="onRequest">No BCO-DMO term</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/730.rdf" xlink:title="Parameter" xlink:actuate="onRequest">latitude</gmx:Anchor>
        </gmd:keyword>
                <gmd:keyword>
          <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/parameter/731.rdf" xlink:title="Parameter" xlink:actuate="onRequest">longitude</gmx:Anchor>
        </gmd:keyword>
        <gmd:type>
            <gmd:MD_KeywordTypeCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_KeywordTypeCode" codeListValue="featureType">featureType</gmd:MD_KeywordTypeCode>
          </gmd:type>
          <gmd:thesaurusName>
            <gmd:CI_Citation>
              <gmd:title>
                <gco:CharacterString>BCO-DMO Standard Parameters</gco:CharacterString>
              </gmd:title>
              <gmd:date gco:nilReason="unknown"/>
            </gmd:CI_Citation>
          </gmd:thesaurusName>
        </gmd:MD_Keywords>
      </gmd:descriptiveKeywords>
      <gmd:resourceConstraints>
        <gmd:MD_LegalConstraints>
          <gmd:accessConstraints>
            <gmd:MD_RestrictionCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_RestrictionCode" codeListValue="otherRestrictions" codeSpace="008">otherRestrictions</gmd:MD_RestrictionCode>
          </gmd:accessConstraints>
          <gmd:useConstraints>
            <gmd:MD_RestrictionCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_RestrictionCode" codeListValue="otherRestrictions" codeSpace="008">otherRestrictions</gmd:MD_RestrictionCode>
          </gmd:useConstraints>
          <gmd:otherConstraints>
            <gco:CharacterString>Access Constraints: none. Use Constraints: Please follow guidelines at: http://www.bco-dmo.org/terms-use Distribution liability: Under no circumstances shall BCO-DMO be liable for any direct, incidental, special, consequential, indirect, or punitive damages that result from the use of, or the inability to use, the materials in this data submission. If you are dissatisfied with any materials in this data submission your sole and exclusive remedy is to discontinue use.</gco:CharacterString>
          </gmd:otherConstraints>
        </gmd:MD_LegalConstraints>
              </gmd:resourceConstraints>
      <gmd:aggregationInfo>
        <gmd:MD_AggregateInformation>
          <gmd:aggregateDataSetName>
              <gmd:CI_Citation>
                <gmd:title>
                  <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/project/700324.rdf" xlink:title="Project" xlink:actuate="onRequest">Microbial activity in the crustal deep biosphere</gmx:Anchor>
                </gmd:title>
                <gmd:date gco:nilReason="inapplicable"/>
                <gmd:citedResponsibleParty>
                  <gmd:CI_ResponsibleParty>
                    <gmd:contactInfo>
                      <gmd:CI_Contact>
                        <gmd:onlineResource>
                          <gmd:CI_OnlineResource>
                          <gmd:linkage>
                            <gmd:URL>https://osprey.bco-dmo.org/project/700324</gmd:URL>
                          </gmd:linkage>
                          <gmd:name>
                            <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/project/700324.rdf" xlink:title="Project Name" xlink:actuate="onRequest">Microbial activity in the crustal deep biosphere</gmx:Anchor>
                          </gmd:name>
                          <gmd:description>
                            <gco:CharacterString>&lt;p&gt;&lt;em&gt;NSF Award Abstract:&lt;/em&gt;&lt;br /&gt;
The marine deep biosphere is the habitat for life existing under the sea floor. The zone has remarkably low energy sources creating a paradox of how life can persist there. Resolving this energy paradox is a grand challenge in deep biosphere research. The Juan de Fuca Ridge flank off the coast of Washington, USA, is an accessible, low energy environment making it an attractive location for addressing this challenge. A series of experiments will be conducted on the seafloor at the Juan de Fuca Ridge flank, using established subseafloor observatories that access the crustal deep biosphere, to provide the first direct in situ measurement of microbial activity in the crustal subsurface. This project will provide essential information about the ability of life to survive under conditions that we are not able to replicate in the laboratory, but that are increasingly important for understanding microbial community interaction in the environment. This information can then be used in models of global microbial activity for estimating the impact of this biosphere on elemental cycling, transforming our understanding of microbial processes within this vast subseafloor habitat. To communicate these discoveries to the public, the project will include a ship-to-shore outreach program during the cruise. In addition public lectures will be presented, and an interactive display of deep-sea video footage will be set up for the annual public Open House at the Bigelow Laboratory for Ocean Sciences in Maine. Diverse undergraduate students and a postdoctoral researcher will be recruited to participate in the research and public outreach activities.&lt;/p&gt;
&lt;p&gt;This project proposes to leverage existing subsurface infrastructure on the eastern flank of the Juan de Fuca Ridge with advances in single-cell based molecular and geochemical approaches to make fundamental new discoveries about the activity of life in the deep crustal biosphere. During a two-week research cruise, the research team will incubate crustal fluids in situ and in the laboratory with labeled substrates for tracking single-cell activity, coupled with radioisotope tracer activity and potentiostat measurements, with the objective of determining in situ and potential rates of activity and cellular physiology. The research will also identify which metabolisms active microorganisms utilize under in situ and laboratory conditions, the rates of these processes, and the microorganisms involved. The results are expected to provide explicit hypothesis testing of microbial activity and in situ microbial growth rates from the crustal deep biosphere to transform understanding of microbial activity in the crustal deep biosphere and generate critical information about the ability of life to survive under low energy conditions.&lt;/p&gt;</gco:CharacterString>
                          </gmd:description>
                          <gmd:function>
                            <gmd:CI_OnLineFunctionCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_OnLineFunctionCode" codeListValue="information"/>
                          </gmd:function>
                      </gmd:CI_OnlineResource>
                    </gmd:onlineResource>
                  </gmd:CI_Contact>
                </gmd:contactInfo>
                <gmd:role/>
              </gmd:CI_ResponsibleParty>
            </gmd:citedResponsibleParty>
          </gmd:CI_Citation>
        </gmd:aggregateDataSetName>
          <gmd:aggregateDataSetIdentifier>
            <gmd:MD_Identifier>
              <gmd:code>
                <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/project/700324.rdf" xlink:title="Project Acronym" xlink:actuate="onRequest">Slow Life in Crust</gmx:Anchor>
              </gmd:code>
            </gmd:MD_Identifier>
          </gmd:aggregateDataSetIdentifier>
          <gmd:associationType>
            <gmd:DS_AssociationTypeCode codeList="https://data.noaa.gov/resources/iso19139/schema/resources/Codelist/gmxCodelists.xml#DS_AssociationTypeCode" codeListValue="largerWorkCitation">largerWorkCitation</gmd:DS_AssociationTypeCode>
          </gmd:associationType>
          <gmd:initiativeType>
            <gmd:DS_InitiativeTypeCode codeList="https://data.noaa.gov/resources/iso19139/schema/resources/Codelist/gmxCodelists.xml#DS_InitiativeTypeCode" codeListValue="project">project</gmd:DS_InitiativeTypeCode>
          </gmd:initiativeType>
        </gmd:MD_AggregateInformation>
      </gmd:aggregationInfo>
      <gmd:language>
        <gco:CharacterString>eng; USA</gco:CharacterString>
      </gmd:language>
      <gmd:topicCategory>
        <gmd:MD_TopicCategoryCode>biota</gmd:MD_TopicCategoryCode>
      </gmd:topicCategory>
       <gmd:extent>
        <gmd:EX_Extent id="boundingExtent"><gmd:geographicElement>
            <gmd:EX_GeographicBoundingBox id="boundingGeographicBoundingBox">
              <gmd:westBoundLongitude>
                <gco:Decimal>-176.57</gco:Decimal>
              </gmd:westBoundLongitude>
              <gmd:eastBoundLongitude>
                <gco:Decimal>179.06</gco:Decimal>
              </gmd:eastBoundLongitude>
              <gmd:southBoundLatitude>
                <gco:Decimal>-77.01</gco:Decimal>
              </gmd:southBoundLatitude>
              <gmd:northBoundLatitude>
                <gco:Decimal>81.82</gco:Decimal>
              </gmd:northBoundLatitude>
            </gmd:EX_GeographicBoundingBox>
          </gmd:geographicElement>
        <gmd:temporalElement>
            <gmd:EX_TemporalExtent>
              <gmd:extent>
                <gml:TimePeriod gml:id="boundingTemporalExtent">
                  <gml:beginPosition>2013-04-26</gml:beginPosition>
                  <gml:endPosition>2023-01-12</gml:endPosition>
                </gml:TimePeriod>
              </gmd:extent>
            </gmd:EX_TemporalExtent>
          </gmd:temporalElement>
          </gmd:EX_Extent>
      </gmd:extent>
      <gmd:extent>
          <gmd:EX_Extent>
            <gmd:geographicElement>
              <gmd:EX_GeographicDescription>
                <gmd:geographicIdentifier>
                  <gmd:MD_Identifier>
                    <gmd:code>
                      <gco:CharacterString>Juan de Fuca Ridge flank CORKs, 47N/127W</gco:CharacterString>
                    </gmd:code>
                  </gmd:MD_Identifier>
                </gmd:geographicIdentifier>
              </gmd:EX_GeographicDescription>
            </gmd:geographicElement>
          </gmd:EX_Extent>
        </gmd:extent></gmd:MD_DataIdentification>
  </gmd:identificationInfo>
  <gmd:contentInfo>
    <gmd:MD_FeatureCatalogueDescription>
      <gmd:includedWithDataset>
        <gco:Boolean>0</gco:Boolean>
      </gmd:includedWithDataset>
      <gmd:featureCatalogueCitation>
        <gmd:CI_Citation>
          <gmd:title>
            <gco:CharacterString>BCO-DMO catalogue of parameters from Collection of subsurface bacteria Nitrospirota and Nitrospinota genome data including IMG and NCBI accessions for sequence datasets in June 2021 (Slow Life in Crust project)</gco:CharacterString>
          </gmd:title>
          <gmd:date gco:nilReason="unknown"/>
          <gmd:citedResponsibleParty>
            <gmd:CI_ResponsibleParty>
  <gmd:organisationName>
    <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/affiliation/191.rdf" xlink:actuate="onRequest">Biological and Chemical Oceanography Data Management Office (BCO-DMO)</gmx:Anchor>
  </gmd:organisationName>
  <gmd:contactInfo>
    <gmd:CI_Contact>
		  <gmd:phone>
		    <gmd:CI_Telephone>
				  <gmd:voice>
				    <gco:CharacterString>Unavailable</gco:CharacterString>
				  </gmd:voice>
				  <gmd:facsimile>
				    <gco:CharacterString>508-289-2009</gco:CharacterString>
				  </gmd:facsimile>
				</gmd:CI_Telephone>
		  </gmd:phone>
		  <gmd:address>
		    <gmd:CI_Address>
				  <gmd:deliveryPoint>
				    <gco:CharacterString>WHOI MS#36</gco:CharacterString>
				  </gmd:deliveryPoint>
				  <gmd:city>
				    <gco:CharacterString>Woods Hole</gco:CharacterString>
				  </gmd:city>
				  <gmd:administrativeArea>
				    <gco:CharacterString>MA</gco:CharacterString>
				  </gmd:administrativeArea>
				  <gmd:postalCode>
				    <gco:CharacterString>02543</gco:CharacterString>
				  </gmd:postalCode>
				  <gmd:country>
				    <gco:CharacterString>USA</gco:CharacterString>
				  </gmd:country>
				  <gmd:electronicMailAddress>
				    <gco:CharacterString>info@bco-dmo.org</gco:CharacterString>
				  </gmd:electronicMailAddress>
		    </gmd:CI_Address>
		  </gmd:address>
      <gmd:onlineResource>
          <gmd:CI_OnlineResource>
            <gmd:linkage>
              <gmd:URL>http://www.bco-dmo.org</gmd:URL>
            </gmd:linkage>
          </gmd:CI_OnlineResource>
        </gmd:onlineResource>
		  <gmd:hoursOfService>
        <gco:CharacterString>Monday - Friday 8:00am - 5:00pm</gco:CharacterString>
      </gmd:hoursOfService>
		  <gmd:contactInstructions>
		    <gco:CharacterString>For questions regarding this resource, please contact BCO-DMO via the email address provided.</gco:CharacterString>
		  </gmd:contactInstructions>
		</gmd:CI_Contact>
  </gmd:contactInfo>
  <gmd:role>
    <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact"  codeSpace="007">pointOfContact</gmd:CI_RoleCode>
  </gmd:role>
</gmd:CI_ResponsibleParty>
          </gmd:citedResponsibleParty>
          <gmd:otherCitationDetails>
            <gco:CharacterString>
            http://lod.bco-dmo.org/id/dataset-parameter/947368.rdf
	Name: ID
	Units: unitless
	Description: &lt;p&gt;Name of sequence data&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947369.rdf
	Name: IMG_genome_id
	Units: unitless
	Description: &lt;p&gt;IMG genome assembly ID (Integrated Microbial Genomes and Metagenomes)&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947370.rdf
	Name: GenBank_assembly
	Units: unitless
	Description: &lt;p&gt;NCBI genome assembly accession (National Center for Biotechnology Informatio)&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947371.rdf
	Name: Sample
	Units: unitless
	Description: &lt;p&gt;Submitter's original NCBI BioSample accession (National Center for Biotechnology Information)&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947372.rdf
	Name: Corrected_BioSample
	Units: unitless
	Description: &lt;p&gt;NCBI BioSample accession retrieved with NCBI API call using NCBI Genome assembly accession (National Center for Biotechnology Information)&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947373.rdf
	Name: BioProject
	Units: unitless
	Description: &lt;p&gt;NCBI BioProject accession (National Center for Biotechnology Information)&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947374.rdf
	Name: release_date
	Units: unitless
	Description: &lt;p&gt;Release date for genome to be publicly accessible&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947375.rdf
	Name: last_updated_date
	Units: unitless
	Description: &lt;p&gt;Last date BioSample metadata was updated&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947376.rdf
	Name: publication_date
	Units: unitless
	Description: &lt;p&gt;Date that genome assembly was submitted to either IMG (Integrated Microbial Genomes and Metagenomes) or NCBI (National Center for Biotechnology Information)&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947377.rdf
	Name: Domain
	Units: unitless
	Description: &lt;p&gt;Taxonomic phylogenetic domain of the sequence data&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947378.rdf
	Name: Phylum
	Units: unitless
	Description: &lt;p&gt;Taxonomic phylogenetic phylum of the sequence data&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947379.rdf
	Name: Class
	Units: unitless
	Description: &lt;p&gt;Taxonomic phylogenetic class of the sequence data&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947380.rdf
	Name: Order
	Units: unitless
	Description: &lt;p&gt;Taxonomic phylogenetic order of the sequence data&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947381.rdf
	Name: Family
	Units: unitless
	Description: &lt;p&gt;Taxonomic phylogenetic family of the sequence data&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947382.rdf
	Name: Genus
	Units: unitless
	Description: &lt;p&gt;Taxonomic phylogenetic genus of the sequence data. Blank if could not be identified.&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947383.rdf
	Name: Species
	Units: unitless
	Description: &lt;p&gt;Taxonomic phylogenetic species of the sequence data. Blank if could not be identified.&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947384.rdf
	Name: NCBI_organism_taxid
	Units: unitless
	Description: &lt;p&gt;NCBI taxon id of organism&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947385.rdf
	Name: Isolation_Source
	Units: unitless
	Description: &lt;p&gt;descriptive category of environment where data originated from&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947386.rdf
	Name: IsolationPlot
	Units: unitless
	Description: &lt;p&gt;Grouping of Isolation Source into predefined categories for plotting&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947387.rdf
	Name: Location
	Units: unitless
	Description: &lt;p&gt;Descriptive location where sequence data originated&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947388.rdf
	Name: Coordinates
	Units: degrees
	Description: &lt;p&gt;Downloaded latitude and longitude data from NCBI&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947389.rdf
	Name: Latitude
	Units: decimal degrees
	Description: &lt;p&gt;latitude of sample, south is negative&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947390.rdf
	Name: Longitude
	Units: decimal degrees
	Description: &lt;p&gt;longitude of sample, west is negative&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947391.rdf
	Name: Completeness
	Units: unitless
	Description: &lt;p&gt;Estimated genome completeness&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947392.rdf
	Name: Contamination
	Units: unitless
	Description: &lt;p&gt;Estimated contamination of genome data&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947393.rdf
	Name: Genome_Size_bp
	Units: basepairs
	Description: &lt;p&gt;Length of genome in basepairs&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947394.rdf
	Name: Estimated_Genome_Length
	Units: unitless
	Description: &lt;p&gt;basepairs length of contig&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947395.rdf
	Name: num_predicted_genes
	Units: unitless
	Description: &lt;p&gt;The number of predicted genes in the contig&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947396.rdf
	Name: GC
	Units: unitless
	Description: &lt;p&gt;percentage of GC basepairs out of all basepairs&lt;/p&gt; 
http://lod.bco-dmo.org/id/dataset-parameter/947397.rdf
	Name: Coding_density
	Units: unitless
	Description: &lt;p&gt;percent of contig with genomic information in a known gene&lt;/p&gt; 
</gco:CharacterString>
          </gmd:otherCitationDetails>
        </gmd:CI_Citation>
      </gmd:featureCatalogueCitation>
      <gmd:featureCatalogueCitation>
        <gmd:CI_Citation>
          <gmd:title>
            <gco:CharacterString>GB/NERC/BODC &gt; British Oceanographic Data Centre, Natural Environment Research Council, United Kingdom</gco:CharacterString>
          </gmd:title>
          <gmd:date gco:nilReason="unknown"/>
          <gmd:citedResponsibleParty xlink:href="https://data.noaa.gov/docucomp/3D8FE2C9DA324D40939C8D007587E011" xlink:title="GB/NERC/BODC &gt; British Oceanographic Data Centre, Natural Environment Research Council, United Kingdom"/>
          <gmd:otherCitationDetails>
            <gco:CharacterString>
            </gco:CharacterString>
          </gmd:otherCitationDetails>
        </gmd:CI_Citation>
      </gmd:featureCatalogueCitation>
    </gmd:MD_FeatureCatalogueDescription>
  </gmd:contentInfo>
  <gmd:distributionInfo>
    <gmd:MD_Distribution>
      <gmd:distributor>
        <gmd:MD_Distributor>
          <gmd:distributorContact>
            <gmd:CI_ResponsibleParty>
  <gmd:organisationName>
    <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/affiliation/191.rdf" xlink:actuate="onRequest">Biological and Chemical Oceanography Data Management Office (BCO-DMO)</gmx:Anchor>
  </gmd:organisationName>
  <gmd:contactInfo>
    <gmd:CI_Contact>
		  <gmd:phone>
		    <gmd:CI_Telephone>
				  <gmd:voice>
				    <gco:CharacterString>Unavailable</gco:CharacterString>
				  </gmd:voice>
				  <gmd:facsimile>
				    <gco:CharacterString>508-289-2009</gco:CharacterString>
				  </gmd:facsimile>
				</gmd:CI_Telephone>
		  </gmd:phone>
		  <gmd:address>
		    <gmd:CI_Address>
				  <gmd:deliveryPoint>
				    <gco:CharacterString>WHOI MS#36</gco:CharacterString>
				  </gmd:deliveryPoint>
				  <gmd:city>
				    <gco:CharacterString>Woods Hole</gco:CharacterString>
				  </gmd:city>
				  <gmd:administrativeArea>
				    <gco:CharacterString>MA</gco:CharacterString>
				  </gmd:administrativeArea>
				  <gmd:postalCode>
				    <gco:CharacterString>02543</gco:CharacterString>
				  </gmd:postalCode>
				  <gmd:country>
				    <gco:CharacterString>USA</gco:CharacterString>
				  </gmd:country>
				  <gmd:electronicMailAddress>
				    <gco:CharacterString>info@bco-dmo.org</gco:CharacterString>
				  </gmd:electronicMailAddress>
		    </gmd:CI_Address>
		  </gmd:address>
      <gmd:onlineResource>
          <gmd:CI_OnlineResource>
            <gmd:linkage>
              <gmd:URL>http://www.bco-dmo.org</gmd:URL>
            </gmd:linkage>
          </gmd:CI_OnlineResource>
        </gmd:onlineResource>
		  <gmd:hoursOfService>
        <gco:CharacterString>Monday - Friday 8:00am - 5:00pm</gco:CharacterString>
      </gmd:hoursOfService>
		  <gmd:contactInstructions>
		    <gco:CharacterString>For questions regarding this resource, please contact BCO-DMO via the email address provided.</gco:CharacterString>
		  </gmd:contactInstructions>
		</gmd:CI_Contact>
  </gmd:contactInfo>
  <gmd:role>
    <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact"  codeSpace="007">pointOfContact</gmd:CI_RoleCode>
  </gmd:role>
</gmd:CI_ResponsibleParty>
          </gmd:distributorContact>
        </gmd:MD_Distributor>
      </gmd:distributor>
      <gmd:transferOptions>
          <gmd:MD_DigitalTransferOptions>
          <gmd:transferSize>
              <gco:Real>160703</gco:Real>
            </gmd:transferSize><gmd:onLine>
            <gmd:CI_OnlineResource>
              <gmd:linkage><gmd:URL>https://darchive.mblwhoilibrary.org/server/api/core/bitstreams/9652e9e2-6e0e-474a-b485-bda1fa803a06/content</gmd:URL></gmd:linkage>              <gmd:name>
                <gco:CharacterString></gco:CharacterString>
              </gmd:name><gmd:function>
                <gmd:CI_OnLineFunctionCode codeList="https://data.noaa.gov/resources/iso19139/schema/resources/Codelist/gmxCodelists.xml#CI_OnLineFunctionCode" codeListValue="download">download</gmd:CI_OnLineFunctionCode>
              </gmd:function>
            </gmd:CI_OnlineResource>
          </gmd:onLine>
        </gmd:MD_DigitalTransferOptions>
      </gmd:transferOptions>
              <gmd:transferOptions>
        <gmd:MD_DigitalTransferOptions>
        <gmd:onLine>
            <gmd:CI_OnlineResource>
              <gmd:linkage>
                <gmd:URL>https://doi.org/10.26008/1912/bco-dmo.933610.1</gmd:URL>
              </gmd:linkage>
              <gmd:function>
                <gmd:CI_OnLineFunctionCode codeList="https://data.noaa.gov/resources/iso19139/schema/resources/Codelist/gmxCodelists.xml#CI_OnLineFunctionCode" codeListValue="download">download</gmd:CI_OnLineFunctionCode>
              </gmd:function>
            </gmd:CI_OnlineResource>
          </gmd:onLine>
        <gmd:offLine>
            <gmd:MD_Medium>
              <gmd:name>
                 <gmd:MD_MediumNameCode codeList="https://data.noaa.gov/resources/iso19139/schema/resources/Codelist/gmxCodelists.xml#MD_MediumNameCode" codeListValue="onLine">onLine</gmd:MD_MediumNameCode>
              </gmd:name>
            </gmd:MD_Medium>
          </gmd:offLine>
        </gmd:MD_DigitalTransferOptions>
      </gmd:transferOptions>
    </gmd:MD_Distribution>
  </gmd:distributionInfo>
  <gmd:dataQualityInfo>
    <gmd:DQ_DataQuality>
      <gmd:scope>
        <gmd:DQ_Scope>
          <gmd:level>
            <gmd:MD_ScopeCode codeList="https://data.noaa.gov/resources/iso19139/schema/resources/Codelist/gmxCodelists.xml#MD_ScopeCode" codeListValue="dataset">dataset</gmd:MD_ScopeCode>
          </gmd:level>
        </gmd:DQ_Scope>
      </gmd:scope>
            <gmd:lineage>
        <gmd:LI_Lineage>
          <gmd:processStep xlink:title="Methods and Sampling">
            <gmd:LI_ProcessStep>
              <gmd:description>
                <gco:CharacterString>&amp;lt;p&amp;gt;&amp;lt;em&amp;gt;Genomic dataset collection, curation and quality control: &amp;lt;/em&amp;gt;This study used publicly available genome assemblies. Existing publicly available genome assemblies were downloaded from the National Center for Biotechnology Investigation (NCBI) and the Integrated Microbial Genomes (IMG) database of the U.S. Department of Energy’s Joint Genome Institute in June 2021. The Genome Taxonomy Database (GTDB) website (release 202) was used to access lists of NCBI assembly accession numbers for the following GTDB-assigned phyla: &amp;lt;em&amp;gt;Nitrospinota&amp;lt;/em&amp;gt;, &amp;lt;em&amp;gt;Nitrospinota_A&amp;lt;/em&amp;gt; (now called &amp;lt;em&amp;gt;Tectomicrobia&amp;lt;/em&amp;gt;), &amp;lt;em&amp;gt;Nitrospinota_B&amp;lt;/em&amp;gt;, &amp;lt;em&amp;gt;Nitrospirota&amp;lt;/em&amp;gt;, &amp;lt;em&amp;gt;Nitrospirota_A&amp;lt;/em&amp;gt; (&amp;lt;em&amp;gt;Leptospirilla&amp;lt;/em&amp;gt;). The IMG assemblies were found using the same GTDB taxonomy classifier using the search function on the IMG website. IMG metagenome assemblies that were designated as “public” and “published” were also downloaded for these phyla. Duplicate entries between IMG and NCBI were manually removed.&amp;lt;/p&amp;gt;

&amp;lt;p&amp;gt;&amp;amp;nbsp;&amp;lt;/p&amp;gt;</gco:CharacterString>
              </gmd:description>
              <gmd:source>
                <gmd:LI_Source>
                  <gmd:sourceCitation>
                    <gmd:CI_Citation>
                      <gmd:title>
                        <gco:CharacterString>Specified by the Principal Investigator(s)</gco:CharacterString>
                      </gmd:title>
                      <gmd:date gco:nilReason="unknown"/>
                    </gmd:CI_Citation>
                  </gmd:sourceCitation>
                </gmd:LI_Source>
              </gmd:source>
            </gmd:LI_ProcessStep>
          </gmd:processStep>
          <gmd:processStep xlink:title="Data Processing Description">
            <gmd:LI_ProcessStep>
              <gmd:description>
                <gco:CharacterString>&amp;lt;p&amp;gt;Quality control of the assemblies was performed using the CheckM qa workflow (v 1.07) to remove genomes with &amp;amp;lt;50% genome completion and &amp;amp;gt;10% sequence contamination, leaving genomes that fall within the MIMAG categories “medium” (&amp;amp;gt;50% completion, &amp;amp;lt;10% contamination) and “high” (&amp;amp;gt;90% completion, &amp;amp;lt;5% contamination). These resulting genomes were dereplicated with dRep, using default parameters, to remove nearly-identical assemblies. All genomes were then classified using the GTDB-tk classifier tool (v1.5.0, r202). Polyphyletic groups that were once considered a part of &amp;lt;em&amp;gt;Nitrospirota&amp;lt;/em&amp;gt; and &amp;lt;em&amp;gt;Nitrospinota&amp;lt;/em&amp;gt; (i.e., &amp;lt;em&amp;gt;Nitrospirota_A&amp;lt;/em&amp;gt; (&amp;lt;em&amp;gt;Leptospirilla&amp;lt;/em&amp;gt;), &amp;lt;em&amp;gt;Nitrospinota_A&amp;lt;/em&amp;gt; (&amp;lt;em&amp;gt;Tectomicrobia&amp;lt;/em&amp;gt;) and &amp;lt;em&amp;gt;Nitrospinota_B&amp;lt;/em&amp;gt;) were included only in the phylogenomic trees. These groups were not included in the gene cluster based functional analyses. All code to recreate these processes are available at https://github.com/ts-dangelo/bioinformatic_scripts_python.&amp;lt;/p&amp;gt;

&amp;lt;p&amp;gt;&amp;amp;nbsp;&amp;lt;/p&amp;gt;</gco:CharacterString>
              </gmd:description>
              <gmd:source>
                <gmd:LI_Source>
                  <gmd:sourceCitation>
                    <gmd:CI_Citation>
                      <gmd:title>
                        <gco:CharacterString>Specified by the Principal Investigator(s)</gco:CharacterString>
                      </gmd:title>
                      <gmd:date gco:nilReason="unknown"/>
                    </gmd:CI_Citation>
                  </gmd:sourceCitation>
                </gmd:LI_Source>
              </gmd:source>
            </gmd:LI_ProcessStep>
          </gmd:processStep>
        <gmd:processStep xlink:title="BCO-DMO Data Processing Description">
              <gmd:LI_ProcessStep>
                <gmd:description>
                  <gco:CharacterString>Summary of processing on the submitted dataset by the dataset's BCO-DMO data manager.

1. Modified submitted accessions file

Fixed two shifted cell values in the submitted file &amp;quot;Supplemental Data 1.csv&amp;quot; and saved it as &amp;quot;Supplemental Data 1 fixed.csv&amp;quot; by using Excel.

The original file has a row number 333 with some values shifted into the wrong columns. Part of an &amp;quot;IsolationPlot&amp;quot; value was shifted to the right into the &amp;quot;Sample&amp;quot; column. And the “Sample” value was shifted to the right into the “Location” column. This was fixed by cutting the value in the “Sample” column and copying it into the &amp;quot;IsolationPlot&amp;quot; column for a final value of “saline water (ENVO:00002010) including plankton (ENVO:xxxxxxxx) “ and then cutting the value SAMN06450621 from the “Location” column and copying it into the &amp;quot;Sample&amp;quot; column.


2. Updated this fixed accessions file with IMG and NCBI metadata

Python code in Jupyter notebooks was created by the data manager and run to add metadata to the submitted accessions file. The scripts were run using the fixed file &amp;quot;Supplemental Data 1 fixed.csv&amp;quot; as input.

Data manager GitHub code repo (last commit = 7e8e2462a on 12/30/2024 updating docs)
https://github.com/BCODMO/DM_scripts_by_dataset/tree/master/Orcutt_accessions/933610

Code run on 2024-12-29 using the following two Jupyter notebooks

https://github.com/BCODMO/DM_scripts_by_dataset/blob/master/Orcutt_accessions/933610/get_ncbi_accessions_with_api.ipynb

https://github.com/BCODMO/DM_scripts_by_dataset/blob/master/Orcutt_accessions/933610/create_updated_supplemental_dataset.ipynb

The code and and processing procedure are documented in markdown files in the GitHub repository
https://github.com/BCODMO/DM_scripts_by_dataset/blob/master/Orcutt_accessions/933610/README.md

https://github.com/BCODMO/DM_scripts_by_dataset/blob/master/Orcutt_accessions/933610/summary_of_processing_steps.md


Eight new columns were added using python Jupyter notebooks run on 12/29/2024 with the output saved to the file named “updated_pi_supplemental_table.csv”.

columns added to the fixed file &amp;quot;Supplemental Data 1 fixed.csv&amp;quot;: IMG_genome_id, BioProject, Corrected_BioSample, GenBank_assembly, release_date, last_updated_date, publication_date, ncbi_taxon_id

column definitions

IMG_genome_id: IMG genome assembly id (extracted from values in “ID” column)
BioProject: NCBI BioProject accession
Corrected_BioSample: correct NCBI BioSample accession
GenBank_assembly: NCBI GenBank genome assembly accession
release_date: Date genome assembly was released to the public
last_updated_date: Last date BioSample was updated on NCBI
publication_date: Date genome assembly submitted to either IMG or NCBI
ncbi_taxon_id: NCBI taxon id of the sampled organism


Steps to retrieve extra metadata

Used a search on the IMG website and python Jupyter notebooks to add NCBI accessions, dates, and NCBI taxon id metadata columns to provide more context to the submitted file.

There are two sources for the new metadata. One source is a search query on the IMG website (Integrated Microbial Genomes and Microbiomes https://img.jgi.doe.gov/ https://img.jgi.doe.gov/) and the other source is NCBI API calls retrieving JSON reports.

The goal was to add corresponding BioProject accessions for each row in the ‘ID’ column of a submitted accessions file. 

In the process of gathering supporting metadata by the dataset's BCO-DMO data manager, 15 BioSample values retrieved with the NCBI API using NCBI genome assembly accessions were found to be different than those in the submitted file’s “Sample” column, which contains BioSample values. The 15 new BioSample values in the column &amp;quot;Corrected_BioSample&amp;quot; in the final BCO-DMO dataset 933610_v1_nitrospirota_and_nitrospinota_genomes.csv were manually checked on their corresponding NCBI web pages. It was found that the BioSample metadata and the corresponding NCBI genome assembly value confirmed the BioSample values retrieved via NCBI API calls were correct. The original 15 BioSample values were manually checked on their corresponding NCBI web pages, and the BioSample metadata and NCBI genome assembly accession for that BioSample did not match those listed in the submitted file.

The incorrect BioSample values in the &amp;quot;Sample&amp;quot; column are the following: 
SAMN03222684, SAMN03222684, SAMN02862043, SAMN03222686, SAMN03222686, SAMEA3140934, SAMEA3140928, SAMN01922995, SAMN03203005, SAMN03203000, SAMN10411419, SAMN12518157, SAMN04315473, SAMN06659639, SAMN07631077

The correct BioSample values are found in the column &amp;quot;Corrected_BioSample&amp;quot; in the primary dataset file 933610_v1_nitrospirota_and_nitrospinota_genomes.csv.

The new columns were found with results from a query on the IMG website (Integrated Microbial Genomes and Microbiomes at [https://img.jgi.doe.gov/](https://img.jgi.doe.gov/)) and python Jupyter notebooks using NCBI API calls and a series of joins with the fixed submitted accessions dataset.

To query the IMG website, a list of IMG genome ids was created using IMG id values in the “ID” column of the file &amp;quot;Supplemental Data 1 fixed.csv&amp;quot;. The IMG genome ids in the “ID” column are prefixed with “IMG_” in the “ID” column and some are suffixed with an underscore followed by a number. 

IMG query URL: https://img.jgi.doe.gov/cgi-bin/m/main.cgi?section=FindGenomes&amp;amp;page=genomeSearch

Downloaded the query results table from IMG (Integrated Microbial Genomes and Microbiomes https://img.jgi.doe.gov/) where the query used the list of IMG genome ids from the “ID” column. The results table contained NCBI accessions and other metadata associated with IMG genome ids.

Created a python Jupyter notebook to perform NCBI API calls using BioSample accessions, NCBI assembly accessions, and taxon names to retrieve data reports in JSON format. The reports JSON contained values for BioSample, BioProject, NCBI assembly accession, release date, last updated date, publication date, and NCBI taxon id.

The NCBI API calls used the NCBI version 2 REST API which is explained here https://www.ncbi.nlm.nih.gov/datasets/docs/v2/reference-docs/rest-api/. The following three API endpoints were used:

/genome/accession/{accession}/dataset_report
/biosample/accession/{accession}/biosample_report
/genome/taxon/{taxon_name}/dataset_report

The API endpoint /genome/accession/{accession}/dataset_report was called with NCBI genome assembly accessions from the “ID” column of the accessions dataset table.

The API endpoint /biosample/accession/{accession}/biosample_report was called with NCBI BioSample accessions from the “Sample” column of the accessions dataset table.

The API endpoint /genome/taxon/{taxon_name}/dataset_report was called using the phylum taxon names of Nitrospinota and Nitrospirota.

All the metadata results from the NCBI API endpoints were combined into one file with duplicates removed.

Created a python Jupyter notebook to run after attaining NCBI accessions and IMG genome ids and this retrieves and adds 8 extra metadata columns to the fixed submitted accessions dataset.

This notebook performed a series of joins on IMG genome ids, NCBI BioSamples and NCBI genome assembly accessions. 

The first step was joining the IMG genome ids and its associated NCBI metadata on to the starting accessions dataset to create a new table.

The next steps were to perform a series of joins onto this new table.

Joined this new table with a table of NCBI accession values, which came from 3 NCBI API calls as discussed above, on the NCBI genome assembly values to create a new table.

A column containing NCBI BioSample values was created from this join. This column contains 15 BioSample values that are different than those in the original submitted accessions table. 

A new column named ‘Fixed_BioSample’ was created by merging NCBI BioSample values found with the IMG query and NCBI BioSample values found with NCBI API calls.

Using the BioSample values in the ‘Fixed_BioSample’ column created by the Jupyter notebook, the NCBI API was called again and the new metadata values found for these corrected BioSample values was used in the final processed dataset. This retrieved metadata was used over any previously attained metadata. Any metadata values not retrieved using the ‘Fixed_BioSample’ column values are filled in with the original metadata values.  

Renamed columns in the table with new metadata, 'Fixed_BioSample' was renamed 'Corrected_BioSample' and this table was saved to a file named “updated_pi_supplemental_table.csv”.


3. Processed the file “updated_pi_supplemental_table.csv” with the BCO-DMO dataset processing tool named laminar

Renamed columns to conform with the BCO-DMO parameter naming convention of replacing spaces with underscores, and starting parameter names with an alphabetical character. Renamed the parameter ““# predicted genes” to “num_predicted_genes”.

Renamed the parameter ncbi_taxon_id to NCBI_organism_taxid.

Converted UTC date parameters with the format %Y-%m-%dT%H:%M:%S.%f to the ISO UTC datetime format of %Y-%m-%dT%H:%M:%SZ.

The organism taxon id is an integer, so removed the trailing decimal 0 to convert it into an integer. 

Reordered the parameters so that genome metadata is towards the end of the dataset table and accessions metadata is towards the start.

Saved this updated table as the final dataset named 933610_v1_nitrospirota_and_nitrospinota_genomes.csv</gco:CharacterString>
                </gmd:description>
                <gmd:source>
                  <gmd:LI_Source>
                    <gmd:sourceCitation>
                      <gmd:CI_Citation>
                        <gmd:title>
                          <gco:CharacterString>Specified by BCO-DMO Data Managers</gco:CharacterString>
                        </gmd:title>
                        <gmd:date gco:nilReason="unknown"/>
                      </gmd:CI_Citation>
                    </gmd:sourceCitation>
                  </gmd:LI_Source>
                </gmd:source>
              </gmd:LI_ProcessStep>
            </gmd:processStep>
          </gmd:LI_Lineage>
      </gmd:lineage>
   </gmd:DQ_DataQuality>
  </gmd:dataQualityInfo>
  <gmd:metadataMaintenance>
    <gmd:MD_MaintenanceInformation>
      <gmd:maintenanceAndUpdateFrequency>
        <gmd:MD_MaintenanceFrequencyCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_MaintenanceFrequencyCode" codeListValue="asNeeded" codeSpace="009">asNeeded</gmd:MD_MaintenanceFrequencyCode>
      </gmd:maintenanceAndUpdateFrequency>
      <gmd:maintenanceNote>
        <gco:CharacterString>7.x-1.1</gco:CharacterString>
      </gmd:maintenanceNote>
      <gmd:contact>
        <gmd:CI_ResponsibleParty>
  <gmd:organisationName>
    <gmx:Anchor xlink:href="http://lod.bco-dmo.org/id/affiliation/191.rdf" xlink:actuate="onRequest">Biological and Chemical Oceanography Data Management Office (BCO-DMO)</gmx:Anchor>
  </gmd:organisationName>
  <gmd:contactInfo>
    <gmd:CI_Contact>
		  <gmd:phone>
		    <gmd:CI_Telephone>
				  <gmd:voice>
				    <gco:CharacterString>Unavailable</gco:CharacterString>
				  </gmd:voice>
				  <gmd:facsimile>
				    <gco:CharacterString>508-289-2009</gco:CharacterString>
				  </gmd:facsimile>
				</gmd:CI_Telephone>
		  </gmd:phone>
		  <gmd:address>
		    <gmd:CI_Address>
				  <gmd:deliveryPoint>
				    <gco:CharacterString>WHOI MS#36</gco:CharacterString>
				  </gmd:deliveryPoint>
				  <gmd:city>
				    <gco:CharacterString>Woods Hole</gco:CharacterString>
				  </gmd:city>
				  <gmd:administrativeArea>
				    <gco:CharacterString>MA</gco:CharacterString>
				  </gmd:administrativeArea>
				  <gmd:postalCode>
				    <gco:CharacterString>02543</gco:CharacterString>
				  </gmd:postalCode>
				  <gmd:country>
				    <gco:CharacterString>USA</gco:CharacterString>
				  </gmd:country>
				  <gmd:electronicMailAddress>
				    <gco:CharacterString>info@bco-dmo.org</gco:CharacterString>
				  </gmd:electronicMailAddress>
		    </gmd:CI_Address>
		  </gmd:address>
      <gmd:onlineResource>
          <gmd:CI_OnlineResource>
            <gmd:linkage>
              <gmd:URL>http://www.bco-dmo.org</gmd:URL>
            </gmd:linkage>
          </gmd:CI_OnlineResource>
        </gmd:onlineResource>
		  <gmd:hoursOfService>
        <gco:CharacterString>Monday - Friday 8:00am - 5:00pm</gco:CharacterString>
      </gmd:hoursOfService>
		  <gmd:contactInstructions>
		    <gco:CharacterString>For questions regarding this resource, please contact BCO-DMO via the email address provided.</gco:CharacterString>
		  </gmd:contactInstructions>
		</gmd:CI_Contact>
  </gmd:contactInfo>
  <gmd:role>
    <gmd:CI_RoleCode codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact"  codeSpace="007">pointOfContact</gmd:CI_RoleCode>
  </gmd:role>
</gmd:CI_ResponsibleParty>
      </gmd:contact>
    </gmd:MD_MaintenanceInformation>
  </gmd:metadataMaintenance>
  <gmi:acquisitionInformation>
    <gmi:MI_AcquisitionInformation>
    <gmi:instrument gco:nilReason="unknown"/></gmi:MI_AcquisitionInformation>
  </gmi:acquisitionInformation>
</gmi:MI_Metadata>
