diff --git a/templates/ena_upload_tool/archive/library construction.json b/templates/ena_upload_tool/archive/library construction.json new file mode 100644 index 0000000..bdc2e2f --- /dev/null +++ b/templates/ena_upload_tool/archive/library construction.json @@ -0,0 +1,249 @@ +{ + "metadata": { + "name": "library construction ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 4, + "temporary_name": "4_ena_custom_library_construction", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": "genome sequencing", + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": "library construction", + "repo_schema_id": "ENA General experiment information 2021", + "organism": "any", + "level": "assay - material" + }, + "data": [ + { + "iri": null, + "name": "Input", + "description": "Existing Samples in DataHub that represent input for this protocol.", + "dataType": "SEEK Sample Multi", + "required": "true", + "isaTag": null + }, + { + "iri": null, + "name": "library construction", + "description": "type of assay or experimental step performed.", + "required": true, + "ontology": null, + "dataType": "String", //need to be autofilled with name of selected SOP for that block + "CVList": null, //SOPtittle + "isaTag": "protocol" + }, + { + "iri": null, + "name": "library_construction_protocol", + "description": "Free form text describing the protocol by which the sequencing library was constructed.", + "dataType": "String", + "required": false, + "ontology": null, + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "title", + "description": "Short text that can be used to call out experiment records in searches or in displays.", + "required": true, + "ontology": null, + "dataType": "String", + "CVList": null, + "isaTag": "otherMaterial_characteristic" + }, + { + "iri": null, + "name": "design_description", + "description": "The design of the library including details of how it was constructed. ", + "required": true, + "ontology": null, + "dataType": "String", + "CVList": null, + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_source", + "description": "The LIBRARY_SOURCE specifies the type of source material that is being sequenced. (SRA 1.2 documentation)", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "GENOMIC", + "GENOMIC SINGLE CELL", + "TRANSCRIPTOMIC", + "TRANSCRIPTOMIC SINGLE CELL", + "METAGENOMIC", + "METATRANSCRIPTOMIC", + "SYNTHETIC", + "VIRAL RNA", + "OTHER" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_strategy", + "description": "Sequencing technique intended for this library (SRA 1.2 documentation)", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "WGS", + "WGA", + "WXS", + "RNA-Seq", + "ssRNA-seq", + "miRNA-Seq", + "ncRNA-Seq", + "FL-cDNA", + "EST", + "Hi-C", + "ATAC-seq", + "WCS", + "RAD-Seq", + "CLONE", + "POOLCLONE", + "AMPLICON", + "CLONEEND", + "FINISHING", + "ChIP-Seq", + "MNase-Seq", + "DNase-Hypersensitivity", + "Bisulfite-Seq", + "CTS", + "MRE-Seq", + "MeDIP-Seq", + "MBD-Seq", + "Tn-Seq", + "VALIDATION", + "FAIRE-seq", + "SELEX", + "RIP-Seq", + "ChIA-PET", + "Synthetic-Long-Read", + "Targeted-Capture", + "Tethered Chromatin Conformation Capture", + "OTHER", + "NOMe-Seq", + "ChM-Seq", + "GBS" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_selection", + "description": "Whether any method was used to select for or against, enrich, or screen the material being sequenced. (SRA 1.2 documentation)", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "RANDOM", + "PCR", + "RANDOM PCR", + "RT-PCR", + "HMPR", + "MF", + "repeat fractionation", + "size fractionation", + "MSLL", + "cDNA", + "cDNA_randomPriming", + "cDNA_oligo_dT", + "PolyA", + "Oligo-dT", + "Inverse rRNA", + "Inverse rRNA selection", + "ChIP", + "ChIP-Seq", + "MNase", + "DNase", + "Hybrid Selection", + "Reduced Representation", + "Restriction Digest", + "5-methylcytidine antibody", + "MBD2 protein methyl-CpG binding domain", + "CAGE", + "RACE", + "MDA", + "padlock probes capture method", + "other", + "unspecified" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_layout", + "description": "specifies whether to expect single, paired, or other configuration of reads. In the case of paired reads, information about the relative distance and orientation is specified.", + "dataType": "Controlled Vocabulary", + "required": false, + "ontology": null, + "CVList": [ + "SINGLE", + "PAIRED" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "insert_size", + "description": "Insert size for paired reads.", + "dataType": "String", + "required": false, + "ontology": null, + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_name", + "description": "Name given to the generated library", + "dataType": "String", + "title": true, + "required": true, + "ontology": null, + "CVList": null, + "isaTag": "otherMaterial" + }, + { + "iri": null, + "name": "submission date", + "description": "Date in which experiment was submitted to ENA.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "otherMaterial_characteristic" + }, + { + "iri": null, + "name": "status", + "description": "Status of submission to ENA.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": [ + "add", + "added", + "modify", + "modified", + "cancel", + "cancelled", + "release", + "released" + ], + "isaTag": "otherMaterial_characteristic" + }, + { + "iri": null, + "name": "accession", + "description": "ENA experiment accession code. Provided by ENA after submission.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "otherMaterial_characteristic" + } + ] +} diff --git a/templates/ena_upload_tool/archive/nucleic acid extraction.json b/templates/ena_upload_tool/archive/nucleic acid extraction.json new file mode 100644 index 0000000..cc81296 --- /dev/null +++ b/templates/ena_upload_tool/archive/nucleic acid extraction.json @@ -0,0 +1,57 @@ +{ + "metadata": { + "name": "nucleic acid extraction ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 3, + "temporary_name": "3_ena_custom_nucleic_acid_extraction", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": "genome sequencing", + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": "nucleic acid extraction", + "repo_schema_id": "ENA General experiment information 2021", + "organism": "any", + "level": "assay - material" + }, + "data": [ + { + "iri": null, + "name": "Input", + "description": "Existing Samples in DataHub that represent input for this protocol.", + "dataType": "SEEK Sample Multi", + "required": "true", + "isaTag": null + }, + { + "iri": null, + "name": "nucleic acid extraction", + "description": "type of assay or experimental step performed.", + "required": true, + "ontology": null, + "dataType": "String", //need to be autofilled with name of selected SOP for that block + "CVList": , //SOPtittle + "isaTag": "protocol" + }, + { + "iri": null, + "name": "sample volume or weight for DNA extraction", + "description": "Volume (mL) or weight (g) of sample processed for DNA extraction", + "required": false, + "ontology": null, + "dataType": "String", // Original regex "(0|((0\\.)|([1-9][0-9]*\\.?))[0-9]*)([Ee][+-]?[0-9]+)?", + "unit": null, //Original unit options ["mL","g"], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "Extract Name", + "description": "User-defined names for each portion of extracted material.", + "dataType": "String", + "title": true, + "required": true, + "ontology": null, + "CVList": null, + "isaTag": "otherMaterial" + } + ] +} diff --git a/templates/ena_upload_tool/archive/nucleic acid sequencing.json b/templates/ena_upload_tool/archive/nucleic acid sequencing.json new file mode 100644 index 0000000..f993440 --- /dev/null +++ b/templates/ena_upload_tool/archive/nucleic acid sequencing.json @@ -0,0 +1,190 @@ +{ + "metadata": { + "name": "nucleic acid sequencing ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 5, + "temporary_name": "5_ena_custom_nucleic_acid_sequencing", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": "genome sequencing", + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": "nucleic acid sequencing", + "repo_schema_id": "ENA General experiment information 2021", + "organism": "any", + "level": "assay - data file" + }, + "data": [ + { + "iri": null, + "name": "Input", + "description": "Existing Samples in DataHub that represent input for this protocol.", + "dataType": "SEEK Sample Multi", + "required": true, + "isaTag": null + }, + { + "iri": null, + "name": "nucleic acid sequencing", + "description": "type of assay or experimental step performed.", + "required": true, + "ontology": null, + "dataType": "String", //need to be autofilled with name of selected SOP for that block + "CVList": null, //SOPtittle + "isaTag": "protocol" + }, + { + "iri": null, + "name": "platform", + "description": "a parameter to report the sequencing instrument model and make", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "LS454", + "Illumina", + "HiSeq", + "NextSeq", + "PacBio", + "Themo Fisher Scientific", + "MGI Tech", + "unspecified" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "instrument_model", + "description": "a parameter to report the sequencing instrument model and make", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "454 GS", + "454 GS 20", + "454 GS FLX", + "454 GS FLX+", + "454 GS FLX Titanium", + "454 GS Junior", + "HiSeq X Five", + "HiSeq X Ten", + "Illumina Genome Analyzer", + "Illumina Genome Analyzer II", + "Illumina Genome Analyzer IIx", + "Illumina HiScanSQ", + "Illumina HiSeq 1000", + "Illumina HiSeq 1500", + "Illumina HiSeq 2000", + "Illumina HiSeq 2500", + "Illumina HiSeq 3000", + "Illumina HiSeq 4000", + "Illumina iSeq 100", + "Illumina MiSeq", + "Illumina MiniSeq", + "Illumina NovaSeq 6000", + "NextSeq 500", + "NextSeq 550", + "PacBio RS", + "PacBio RS II", + "Sequel", + "Ion Torrent PGM", + "Ion Torrent Proton", + "Ion Torrent S5", + "Ion Torrent S5 XL", + "AB 3730xL Genetic Analyzer", + "AB 3730 Genetic Analyzer", + "AB 3500xL Genetic Analyzer", + "AB 3500 Genetic Analyzer", + "AB 3130xL Genetic Analyzer", + "AB 3130 Genetic Analyzer", + "AB 310 Genetic Analyzer", + "MinION", + "GridION", + "PromethION", + "BGISEQ-500", + "DNBSEQ-T7", + "DNBSEQ-G400", + "DNBSEQ-G50", + "DNBSEQ-G400 FAST", + "unspecified" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "file_type", + "description": "The run data file model.", + "dataType": "String", + "required": true, + "ontology": null, + "CVList": [ + "bam", + "cram", + "fastq", + "oxfordnanopore_native", + "pacbio_hdf5", + "sff" + ], + "isaTag": "dataFile_comment" + }, + { + "iri": null, + "name": "file checksum", + "description": "The MD5 checksum of the file.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "dataFile_comment" + }, + { + "iri": null, + "name": "Raw Data File", + "description": "Name (or URI) of the raw data file generated by an assay.", + "dataType": "String", + "title": true, + "required": true, + "ontology": null, + "CVList": null, + "isaTag": "dataFile" + }, + { + "iri": null, + "name": "submission date", + "description": "Date in which run was submitted to ENA.Date of submission to ENA.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "dataFile_comment" + }, + { + "iri": null, + "name": "status", + "description": "Status of submission to ENA.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": [ + "add", + "added", + "modify", + "modified", + "cancel", + "cancelled", + "release", + "released" + ], + "isaTag": "dataFile_comment" + }, + { + "iri": null, + "name": "accession", + "description": "ENA run accession code. Provided by ENA after submission.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "dataFile_comment" + } + ] +} diff --git a/templates/ena_upload_tool/archive/sample collection.json b/templates/ena_upload_tool/archive/sample collection.json new file mode 100644 index 0000000..61aecf6 --- /dev/null +++ b/templates/ena_upload_tool/archive/sample collection.json @@ -0,0 +1,45 @@ +{ + "metadata": { + "name": "sample collection ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 2, + "temporary_name": "2_ena_custom_sample_collection", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": null, + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": "sample collection", + "repo_schema_id": "ERC000011.xml 2021", + "organism": "any", + "level": "study sample" + }, + "data": [ + { + "iri": null, + "name": "Input", + "description": "Existing Samples in DataHub that represent input for this protocol.", + "dataType": "SEEK Sample Multi", + "required": true, + "isaTag": null + }, + { + "iri": null, + "name": "sample collection", + "description": "type of assay or experimental step performed.", + "dataType": "String", //need to be autofilled with name of selected SOP for that block + "required": true, + "ontology": null, + "CVList": null, //SOPtittle + "isaTag": "protocol" + }, + { + "iri": null, + "name": "Sample Name", + "description": "Name of the major output resulting from the application of the protocol.", + "dataType": "String", + "title": true, + "required": true, + "isaTag": "sample" + } + ] +} diff --git a/templates/ena_upload_tool/archive/sequence assembly.json b/templates/ena_upload_tool/archive/sequence assembly.json new file mode 100644 index 0000000..512f1b3 --- /dev/null +++ b/templates/ena_upload_tool/archive/sequence assembly.json @@ -0,0 +1,66 @@ +{ + "metadata": { + "name": "sequence assembly ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 6, + "temporary_name": "6_ena_custom_sequence_assembly", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": "genome sequencing", + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": "sequence assembly", + "repo_schema_id": "ENA General experiment information 2021", + "organism": "any", + "level": "assay - data file" + }, + "data": [ + { + "iri": null, + "name": "Input", + "description": "Existing Samples in DataHub that represent input for this protocol.", + "dataType": "seek sample multi", + "required": "true", + "isaTag": null + }, + { + "iri": null, + "name": "sequence assembly", + "description": "type of assay or experimental step performed.", + "required": true, + "ontology": null, + "dataType": "String", //need to be autofilled with name of selected SOP for that block + "CVList": null, //SOPtittle + "isaTag": "protocol" + }, + { + "iri": null, + "name": "Normalization Name", + "description": "User-defined name for each normalization applied", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "Data Transformation Name", + "description": "a data transformation", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "Derived Data File", + "description": "derived data such as an assembly file", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "dataFile" + } + ] +} diff --git a/templates/ena_upload_tool/archive/source all organisms.json b/templates/ena_upload_tool/archive/source all organisms.json new file mode 100644 index 0000000..77bf043 --- /dev/null +++ b/templates/ena_upload_tool/archive/source all organisms.json @@ -0,0 +1,53 @@ +{ + "metadata": { + "name": "source ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 1, + "temporary_name": "1_ena_custom_source", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": null, + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": null, + "repo_schema_id": "ERC000011.xml 2021", + "organism": "any", + "level": "study source" + }, + "data": [ + { + "iri": null, + "name": "Source Name", + "description": "Sources are considered as the starting biological material used in a study.", + "dataType": "String", + "title": true, + "required": true, + "isaTag": "source" + }, + { + "iri": null, + "name": "title", + "description": "Short text that can be used to call out sample records in search results or in displays.", + "dataType": "String", + "required": true, + "isaTag": "source_characteristic" + }, + { + "iri": null, + "name": "description", + "description": "Free-form text describing the sample, its origin, and its method of isolation.", + "dataType": "String", + "required": true, + "ontology": null, + "isaTag": "source_characteristic" + }, + { + "iri": null, + "name": "taxon_id", //First field on an ENA sample file. It is the number ID + "description": "NCBI Taxonomy Identifier number ID. This is appropriate for individual organisms and some environmental samples.", + "dataType": "Integer", + "required": true, + "ontology": null, + "isaTag": "source_characteristic" + } + ] +} diff --git a/templates/ena_upload_tool/ena_upload_tool_aggregated.json b/templates/ena_upload_tool/ena_upload_tool_aggregated.json new file mode 100644 index 0000000..82ac0e3 --- /dev/null +++ b/templates/ena_upload_tool/ena_upload_tool_aggregated.json @@ -0,0 +1,886 @@ +{ + "data": [ + { + "metadata": { + "name": "source ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 1, + "temporary_name": "1_ena_custom_source", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": null, + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": null, + "repo_schema_id": "ERC000011.xml 2021", + "organism": "any", + "level": "study source" + }, + "data": [ + { + "iri": null, + "name": "Source Name", + "description": "Sources are considered as the starting biological material used in a study.", + "dataType": "String", + "title": true, + "required": true, + "isaTag": "source" + }, + { + "iri": null, + "name": "geographic location (country and/or sea)", + "description": "The geographical origin of the sample as defined by the country or sea. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html).", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "Afghanistan", + "Albania", + "Algeria", + "American Samoa", + "Andorra", + "Angola", + "Anguilla", + "Antarctica", + "Antigua and Barbuda", + "Arctic Ocean", + "Argentina", + "Armenia", + "Aruba", + "Ashmore and Cartier Islands", + "Atlantic Ocean", + "Australia", + "Austria", + "Azerbaijan", + "Bahamas", + "Bahrain", + "Baker Island", + "Baltic Sea", + "Bangladesh", + "Barbados", + "Bassas da India", + "Belarus", + "Belgium", + "Belize", + "Benin", + "Bermuda", + "Bhutan", + "Bolivia", + "Borneo", + "Bosnia and Herzegovina", + "Botswana", + "Bouvet Island", + "Brazil", + "British Virgin Islands", + "Brunei", + "Bulgaria", + "Burkina Faso", + "Burundi", + "Cambodia", + "Cameroon", + "Canada", + "Cape Verde", + "Cayman Islands", + "Central African Republic", + "Chad", + "Chile", + "China", + "Christmas Island", + "Clipperton Island", + "Cocos Islands", + "Colombia", + "Comoros", + "Cook Islands", + "Coral Sea Islands", + "Costa Rica", + "Cote d'Ivoire", + "Croatia", + "Cuba", + "Curacao", + "Cyprus", + "Czech Republic", + "Democratic Republic of the Congo", + "Denmark", + "Djibouti", + "Dominica", + "Dominican Republic", + "East Timor", + "Ecuador", + "Egypt", + "El Salvador", + "Equatorial Guinea", + "Eritrea", + "Estonia", + "Ethiopia", + "Europa Island", + "Falkland Islands (Islas Malvinas)", + "Faroe Islands", + "Fiji", + "Finland", + "France", + "French Guiana", + "French Polynesia", + "French Southern and Antarctic Lands", + "Gabon", + "Gambia", + "Gaza Strip", + "Georgia", + "Germany", + "Ghana", + "Gibraltar", + "Glorioso Islands", + "Greece", + "Greenland", + "Grenada", + "Guadeloupe", + "Guam", + "Guatemala", + "Guernsey", + "Guinea", + "Guinea-Bissau", + "Guyana", + "Haiti", + "Heard Island and McDonald Islands", + "Honduras", + "Hong Kong", + "Howland Island", + "Hungary", + "Iceland", + "India", + "Indian Ocean", + "Indonesia", + "Iran", + "Iraq", + "Ireland", + "Isle of Man", + "Israel", + "Italy", + "Jamaica", + "Jan Mayen", + "Japan", + "Jarvis Island", + "Jersey", + "Johnston Atoll", + "Jordan", + "Juan de Nova Island", + "Kazakhstan", + "Kenya", + "Kerguelen Archipelago", + "Kingman Reef", + "Kiribati", + "Kosovo", + "Kuwait", + "Kyrgyzstan", + "Laos", + "Latvia", + "Lebanon", + "Lesotho", + "Liberia", + "Libya", + "Liechtenstein", + "Lithuania", + "Luxembourg", + "Macau", + "Macedonia", + "Madagascar", + "Malawi", + "Malaysia", + "Maldives", + "Mali", + "Malta", + "Marshall Islands", + "Martinique", + "Mauritania", + "Mauritius", + "Mayotte", + "Mediterranean Sea", + "Mexico", + "Micronesia", + "Midway Islands", + "Moldova", + "Monaco", + "Mongolia", + "Montenegro", + "Montserrat", + "Morocco", + "Mozambique", + "Myanmar", + "Namibia", + "Nauru", + "Navassa Island", + "Nepal", + "Netherlands", + "New Caledonia", + "New Zealand", + "Nicaragua", + "Niger", + "Nigeria", + "Niue", + "Norfolk Island", + "North Korea", + "North Sea", + "Northern Mariana Islands", + "Norway", + "Oman", + "Pacific Ocean", + "Pakistan", + "Palau", + "Palmyra Atoll", + "Panama", + "Papua New Guinea", + "Paracel Islands", + "Paraguay", + "Peru", + "Philippines", + "Pitcairn Islands", + "Poland", + "Portugal", + "Puerto Rico", + "Qatar", + "Republic of the Congo", + "Reunion", + "Romania", + "Ross Sea", + "Russia", + "Rwanda", + "Saint Helena", + "Saint Kitts and Nevis", + "Saint Lucia", + "Saint Pierre and Miquelon", + "Saint Vincent and the Grenadines", + "Samoa", + "San Marino", + "Sao Tome and Principe", + "Saudi Arabia", + "Senegal", + "Serbia", + "Seychelles", + "Sierra Leone", + "Singapore", + "Sint Maarten", + "Slovakia", + "Slovenia", + "Solomon Islands", + "Somalia", + "South Africa", + "South Georgia and the South Sandwich Islands", + "South Korea", + "Southern Ocean", + "Spain", + "Spratly Islands", + "Sri Lanka", + "Sudan", + "Suriname", + "Svalbard", + "Swaziland", + "Sweden", + "Switzerland", + "Syria", + "Taiwan", + "Tajikistan", + "Tanzania", + "Tasman Sea", + "Thailand", + "Togo", + "Tokelau", + "Tonga", + "Trinidad and Tobago", + "Tromelin Island", + "Tunisia", + "Turkey", + "Turkmenistan", + "Turks and Caicos Islands", + "Tuvalu", + "USA", + "Uganda", + "Ukraine", + "United Arab Emirates", + "United Kingdom", + "Uruguay", + "Uzbekistan", + "Vanuatu", + "Venezuela", + "Viet Nam", + "Virgin Islands", + "Wake Island", + "Wallis and Futuna", + "West Bank", + "Western Sahara", + "Yemen", + "Zambia", + "Zimbabwe", + "not applicable", + "not collected", + "not provided", + "restricted access" + ], + "isaTag": "source_characteristic" + }, + { + "iri": null, + "name": "taxon_id", //First field on an ENA sample file. It is the number ID + "description": "NCBI Taxonomy Identifier number ID. This is appropriate for individual organisms and some environmental samples.", + "dataType": "Integer", + "required": true, + "ontology": null, + "isaTag": "source_characteristic" + } + ] + }, + { + "metadata": { + "name": "sample collection ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 2, + "temporary_name": "2_ena_custom_sample_collection", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": null, + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": "sample collection", + "repo_schema_id": "ERC000011.xml 2021", + "organism": "any", + "level": "study sample" + }, + "data": [ + { + "iri": null, + "name": "Input", + "description": "Existing Samples in DataHub that represent input for this protocol.", + "dataType": "Registered Sample List", + "required": true, + "isaTag": null + }, + { + "iri": null, + "name": "sample collection", + "description": "type of assay or experimental step performed.", + "dataType": "String", //need to be autofilled with name of selected SOP for that block + "required": true, + "ontology": null, + "CVList": null, //SOPtittle + "isaTag": "protocol" + }, + { + "iri": null, + "name": "Sample Name", + "description": "Name of the major output resulting from the application of the protocol.", + "dataType": "String", + "title": true, + "required": true, + "isaTag": "sample" + }, + { + "iri": null, + "name": "title", + "description": "Short text that can be used to call out sample records in search results or in displays.", + "dataType": "String", + "required": true, + "isaTag": "sample_characteristic" + }, + { + "iri": null, + "name": "sample_description", + "description": "Free-form text describing the sample, its origin, and its method of isolation.", + "dataType": "String", + "required": true, + "ontology": null, + "isaTag": "sample_characteristic" + }, + { + "iri": null, + "name": "collection date", + "description": "Date of the sample collection", + "dataType": "ENA custom date", + "required": true, + "ontology": null, + "isaTag": "sample_characteristic" + }, + { + "iri": null, + "name": "accession", + "description": "ENA experiment accession code. Provided by ENA after submission.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "sample_characteristic" + }, + { + "iri": null, + "name": "submission_date", + "description": "Date in which experiment was submitted to ENA.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "sample_characteristic" + }, + { + "iri": null, + "name": "status", + "description": "Status of submission to ENA.", + "dataType": "Controlled Vocabulary", + "required": false, + "ontology": null, + "CVList": [ + "add", + "added", + "modify", + "modified", + "cancel", + "cancelled", + "released" + ], + "isaTag": "sample_characteristic" + } + ] + }, + { + "metadata": { + "name": "library construction ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 4, + "temporary_name": "4_ena_custom_library_construction", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": "genome sequencing", + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": "library construction", + "repo_schema_id": "ENA General experiment information 2021", + "organism": "any", + "level": "assay - material" + }, + "data": [ + { + "iri": null, + "name": "Input", + "description": "Existing Samples in DataHub that represent input for this protocol.", + "dataType": "Registered Sample List", + "required": true, + "isaTag": null + }, + { + "iri": null, + "name": "library construction", + "description": "Type of assay or experimental step performed.", + "required": true, + "title": false, + "ontology": null, + "dataType": "String", //need to be autofilled with name of selected SOP for that block + "CVList": null, //SOPtittle + "isaTag": "other_material" + }, + { + "iri": null, + "name": "library_construction_protocol", + "description": "Free form text describing the protocol by which the sequencing library was constructed.", + "dataType": "String", + "required": false, + "ontology": null, + "isaTag": "protocol" + }, + { + "iri": null, + "name": "title", + "description": "Short text that can be used to call out experiment records in searches or in displays.", + "required": true, + "ontology": null, + "dataType": "String", + "CVList": null, + "isaTag": "other_material_characteristic" + }, + { + "iri": null, + "name": "design_description", + "description": "The design of the library including details of how it was constructed. ", + "required": false, + "ontology": null, + "dataType": "String", + "CVList": null, + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_name", + "description": "Name of the library to be published in ENA", + "dataType": "String", + "required": true, + "ontology": null, + "CVList": null, + "isaTag": "other_material_characteristic" + }, + { + "iri": null, + "name": "library_source", + "description": "The LIBRARY_SOURCE specifies the type of source material that is being sequenced. (SRA 1.2 documentation)", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "GENOMIC", + "GENOMIC SINGLE CELL", + "TRANSCRIPTOMIC", + "TRANSCRIPTOMIC SINGLE CELL", + "METAGENOMIC", + "METATRANSCRIPTOMIC", + "SYNTHETIC", + "VIRAL RNA", + "OTHER" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_strategy", + "description": "Sequencing technique intended for this library (SRA 1.2 documentation)", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "WGS", + "WGA", + "WXS", + "RNA-Seq", + "ssRNA-seq", + "miRNA-Seq", + "ncRNA-Seq", + "FL-cDNA", + "EST", + "Hi-C", + "ATAC-seq", + "WCS", + "RAD-Seq", + "CLONE", + "POOLCLONE", + "AMPLICON", + "CLONEEND", + "FINISHING", + "ChIP-Seq", + "MNase-Seq", + "DNase-Hypersensitivity", + "Bisulfite-Seq", + "CTS", + "MRE-Seq", + "MeDIP-Seq", + "MBD-Seq", + "Tn-Seq", + "VALIDATION", + "FAIRE-seq", + "SELEX", + "RIP-Seq", + "ChIA-PET", + "Synthetic-Long-Read", + "Targeted-Capture", + "Tethered Chromatin Conformation Capture", + "OTHER", + "NOMe-Seq", + "ChM-Seq", + "GBS" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_selection", + "description": "Whether any method was used to select for or against, enrich, or screen the material being sequenced. (SRA 1.2 documentation)", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "RANDOM", + "PCR", + "RANDOM PCR", + "RT-PCR", + "HMPR", + "MF", + "repeat fractionation", + "size fractionation", + "MSLL", + "cDNA", + "cDNA_randomPriming", + "cDNA_oligo_dT", + "PolyA", + "Oligo-dT", + "Inverse rRNA", + "Inverse rRNA selection", + "ChIP", + "ChIP-Seq", + "MNase", + "DNase", + "Hybrid Selection", + "Reduced Representation", + "Restriction Digest", + "5-methylcytidine antibody", + "MBD2 protein methyl-CpG binding domain", + "CAGE", + "RACE", + "MDA", + "padlock probes capture method", + "other", + "unspecified" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "library_layout", + "description": "specifies whether to expect single, paired, or other configuration of reads. In the case of paired reads, information about the relative distance and orientation is specified.", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "SINGLE", + "PAIRED" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "insert_size", + "description": "Insert size for paired reads.", + "dataType": "String", + "required": false, + "ontology": null, + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "platform", + "description": "a parameter to report the sequencing instrument model and make", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "LS454", + "Illumina", + "HiSeq", + "NextSeq", + "PacBio", + "Themo Fisher Scientific", + "MGI Tech", + "unspecified" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "instrument_model", + "description": "a parameter to report the sequencing instrument model and make", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "454 GS", + "454 GS 20", + "454 GS FLX", + "454 GS FLX+", + "454 GS FLX Titanium", + "454 GS Junior", + "HiSeq X Five", + "HiSeq X Ten", + "Illumina Genome Analyzer", + "Illumina Genome Analyzer II", + "Illumina Genome Analyzer IIx", + "Illumina HiScanSQ", + "Illumina HiSeq 1000", + "Illumina HiSeq 1500", + "Illumina HiSeq 2000", + "Illumina HiSeq 2500", + "Illumina HiSeq 3000", + "Illumina HiSeq 4000", + "Illumina iSeq 100", + "Illumina MiSeq", + "Illumina MiniSeq", + "Illumina NovaSeq 6000", + "NextSeq 500", + "NextSeq 550", + "PacBio RS", + "PacBio RS II", + "Sequel", + "Ion Torrent PGM", + "Ion Torrent Proton", + "Ion Torrent S5", + "Ion Torrent S5 XL", + "AB 3730xL Genetic Analyzer", + "AB 3730 Genetic Analyzer", + "AB 3500xL Genetic Analyzer", + "AB 3500 Genetic Analyzer", + "AB 3130xL Genetic Analyzer", + "AB 3130 Genetic Analyzer", + "AB 310 Genetic Analyzer", + "MinION", + "GridION", + "PromethION", + "BGISEQ-500", + "DNBSEQ-T7", + "DNBSEQ-G400", + "DNBSEQ-G50", + "DNBSEQ-G400 FAST", + "unspecified" + ], + "isaTag": "parameter_value" + }, + { + "iri": null, + "name": "accession", + "description": "ENA experiment accession code. Provided by ENA after submission.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "other_material_characteristic" + }, + { + "iri": null, + "name": "submission_date", + "description": "Date in which run was submitted to ENA.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "other_material_characteristic" + }, + { + "iri": null, + "name": "status", + "description": "Status of submission to ENA.", + "dataType": "Controlled Vocabulary", + "required": false, + "ontology": null, + "CVList": [ + "add", + "added", + "modify", + "modified", + "cancel", + "cancelled", + "released" + ], + "isaTag": "other_material_characteristic" + } + ] + }, + { + "metadata": { + "name": "nucleic acid sequencing ENA Upload Tool", + "group": "ena modified for upload tool", + "group_order": 5, + "temporary_name": "5_ena_custom_nucleic_acid_sequencing", + "version": "1.0.0", + "isa_config": "genome_seq_default_v2015-07-02", + "isa_measurement_type": "genome sequencing", + "isa_technology_type": "nucleotide sequencing", + "isa_protocol_type": "nucleic acid sequencing", + "repo_schema_id": "ENA General experiment information 2021", + "organism": "any", + "level": "assay - data file" + }, + "data": [ + { + "iri": null, + "name": "Input", + "description": "Existing Samples in DataHub that represent input for this protocol.", + "dataType": "Registered Sample List", + "required": true, + "isaTag": null + }, + { + "iri": null, + "name": "nucleic acid sequencing", + "description": "type of assay or experimental step performed.", + "required": true, + "ontology": null, + "dataType": "String", //need to be autofilled with name of selected SOP for that block + "CVList": null, //SOPtittle + "isaTag": "protocol" + }, + { + "iri": null, + "name": "Raw Data File", + "description": "Name (or URI) of the raw data file generated by an assay.", + "dataType": "String", + "title": true, + "required": true, + "ontology": null, + "CVList": null, + "isaTag": "data_file" + }, + { + "iri": null, + "name": "file_name", + "description": "Name (or URI) of the raw data file generated by an assay.", + "dataType": "String", + "required": true, + "ontology": null, + "CVList": null, + "isaTag": "data_file_comment" + }, + { + "iri": null, + "name": "file_type", + "description": "The run data file model.", + "dataType": "Controlled Vocabulary", + "required": true, + "ontology": null, + "CVList": [ + "bam", + "cram", + "fastq", + "oxfordnanopore_native", + "pacbio_hdf5", + "sff" + ], + "isaTag": "data_file_comment" + }, + { + "iri": null, + "name": "file checksum", + "description": "The MD5 checksum of the file.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "data_file_comment" + }, + { + "iri": null, + "name": "accession", + "description": "ENA experiment accession code. Provided by ENA after submission.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "data_file_comment" + }, + { + "iri": null, + "name": "submission_date", + "description": "Date in which run was submitted to ENA.", + "dataType": "String", + "required": false, + "ontology": null, + "CVList": null, + "isaTag": "data_file_comment" + }, + { + "iri": null, + "name": "status", + "description": "Status of submission to ENA.", + "dataType": "Controlled Vocabulary", + "required": false, + "ontology": null, + "CVList": [ + "add", + "added", + "modify", + "modified", + "cancel", + "cancelled", + "released" + ], + "isaTag": "data_file_comment" + } + ] + } + ] +}