diff --git a/cobra/io/__init__.py b/cobra/io/__init__.py index ca889ceda..5ac69fe57 100644 --- a/cobra/io/__init__.py +++ b/cobra/io/__init__.py @@ -5,6 +5,5 @@ from cobra.io.dict import model_from_dict, model_to_dict from cobra.io.json import from_json, load_json_model, save_json_model, to_json from cobra.io.mat import load_matlab_model, save_matlab_model -from cobra.io.sbml import read_sbml_model, write_sbml_model, \ - validate_sbml_model +from cobra.io.sbml import read_sbml_model, validate_sbml_model, write_sbml_model from cobra.io.yaml import from_yaml, load_yaml_model, save_yaml_model, to_yaml diff --git a/cobra/io/dict.py b/cobra/io/dict.py index 47a7c0e3f..d43174463 100644 --- a/cobra/io/dict.py +++ b/cobra/io/dict.py @@ -2,13 +2,14 @@ from __future__ import absolute_import -from collections import OrderedDict +from collections import OrderedDict, defaultdict from operator import attrgetter, itemgetter from numpy import bool_, float_ from six import iteritems, string_types from cobra.core import Gene, Metabolite, Model, Reaction +from cobra.io.sbml import parse_annotation_info from cobra.util.solver import set_objective @@ -53,6 +54,31 @@ } +def _fix_annotation(annotation): + # if annotation is in the form of a list of list, convert it in + # right format first i.e in a dict format + if isinstance(annotation, list): + dict_anno = defaultdict(list) + for item in annotation: + data = parse_annotation_info(item[1]) + if data is None: + continue + else: + provider, identifier = data + + dict_anno[provider].append(identifier) + + annotation = dict_anno + + # Convert single annotation values which are represented as + # as strings as list to have a consistent format + for key in annotation.keys(): + if isinstance(annotation[key], string_types) and key != "sbo": + annotation[key] = [annotation[key]] + + return annotation + + def _fix_type(value): """convert possible types to str, float, and bool""" # Because numpy floats can not be pickled to json @@ -82,6 +108,8 @@ def _update_optional(cobra_object, new_dict, optional_attribute_dict, value = getattr(cobra_object, key) if value is None or value == default: continue + if key == "annotation": + _fix_annotation(value) new_dict[key] = _fix_type(value) @@ -97,6 +125,8 @@ def metabolite_to_dict(metabolite): def metabolite_from_dict(metabolite): new_metabolite = Metabolite() for k, v in iteritems(metabolite): + if k == "annotation": + v = _fix_annotation(v) setattr(new_metabolite, k, v) return new_metabolite @@ -113,6 +143,8 @@ def gene_to_dict(gene): def gene_from_dict(gene): new_gene = Gene(gene["id"]) for k, v in iteritems(gene): + if k == "annotation": + v = _fix_annotation(v) setattr(new_gene, k, v) return new_gene @@ -142,6 +174,8 @@ def reaction_from_dict(reaction, model): (model.metabolites.get_by_id(str(met)), coeff) for met, coeff in iteritems(v))) else: + if k == "annotation": + v = _fix_annotation(v) setattr(new_reaction, k, v) return new_reaction @@ -225,5 +259,7 @@ def model_from_dict(obj): set_objective(model, coefficients) for k, v in iteritems(obj): if k in {'id', 'name', 'notes', 'compartments', 'annotation'}: + if k == "annotation": + v = _fix_annotation(v) setattr(model, k, v) return model diff --git a/cobra/io/sbml.py b/cobra/io/sbml.py index 5ccc3c7d7..d48075ee1 100644 --- a/cobra/io/sbml.py +++ b/cobra/io/sbml.py @@ -1427,26 +1427,20 @@ def _parse_annotations(sbase): # FIXME: read and store the qualifier uri = cvterm.getResourceURI(k) - data = _parse_annotation_info(uri) + data = parse_annotation_info(uri) if data is None: continue else: provider, identifier = data - if provider in annotation: - if isinstance(annotation[provider], string_types): - annotation[provider] = [annotation[provider]] - # FIXME: use a list - if identifier not in annotation[provider]: - annotation[provider].append(identifier) - else: - # FIXME: always in list - annotation[provider] = identifier + if provider not in annotation: + annotation[provider] = [] + annotation[provider].append(identifier) return annotation -def _parse_annotation_info(uri): +def parse_annotation_info(uri): """Parses provider and term from given identifiers annotation uri. Parameters diff --git a/cobra/test/data/invalid_annotation_format.json b/cobra/test/data/invalid_annotation_format.json index 96855b863..a17edbcee 100644 --- a/cobra/test/data/invalid_annotation_format.json +++ b/cobra/test/data/invalid_annotation_format.json @@ -12,6 +12,10 @@ [ "CHEBI", "http://identifiers.org/chebi/CHEBI:11981" + ], + [ + "CHEBI", + "http://identifiers.org/chebi/CHEBI:17847" ] ] } diff --git a/cobra/test/data/mini.pickle b/cobra/test/data/mini.pickle index 2942d27db..78645fbdd 100644 Binary files a/cobra/test/data/mini.pickle and b/cobra/test/data/mini.pickle differ diff --git a/cobra/test/test_io/test_annotation.py b/cobra/test/test_io/test_annotation.py index ae69f3ab8..cbfcb86bf 100644 --- a/cobra/test/test_io/test_annotation.py +++ b/cobra/test/test_io/test_annotation.py @@ -15,9 +15,9 @@ def _check_sbml_annotations(model): assert len(annotation) == 3 for key in ["bigg.model", "doi", "taxonomy"]: assert key in annotation - assert annotation["bigg.model"] == "e_coli_core" - assert annotation["doi"] == "10.1128/ecosalplus.10.2.1" - assert annotation["taxonomy"] == "511145" + assert annotation["bigg.model"] == ["e_coli_core"] + assert annotation["doi"] == ["10.1128/ecosalplus.10.2.1"] + assert annotation["taxonomy"] == ["511145"] # gene annotation # {'asap': 'ABE-0006162', 'ncbigene': '946368', 'uniprot': 'P33221', @@ -26,11 +26,11 @@ def _check_sbml_annotations(model): assert len(annotation) == 5 for key in ["asap", "ncbigene", "uniprot", "ncbigi", "ecogene"]: assert key in annotation - assert annotation["asap"] == "ABE-0006162" - assert annotation["ncbigene"] == "946368" - assert annotation["uniprot"] == "P33221" - assert annotation["ncbigi"] == "gi:16129802" - assert annotation["ecogene"] == "EG11809" + assert annotation["asap"] == ["ABE-0006162"] + assert annotation["ncbigene"] == ["946368"] + assert annotation["uniprot"] == ["P33221"] + assert annotation["ncbigi"] == ["gi:16129802"] + assert annotation["ecogene"] == ["EG11809"] # compartment annotation # FIXME: add tests with first class compartment model @@ -53,7 +53,7 @@ def _check_sbml_annotations(model): "kegg.compound", "seed.compound", "hmdb", "biocyc"]: assert key in annotation assert annotation[ - "inchi"] == "InChI=1S/C3H8O2/c1-3(5)2-4/h3-5H,2H2,1H3/t3-/m0/s1" # noqa: E501 + "inchi"] == ["InChI=1S/C3H8O2/c1-3(5)2-4/h3-5H,2H2,1H3/t3-/m0/s1"] # noqa: E501 # reaction annotation # {'kegg.reaction': 'R00228', 'sbo': 'SBO:0000375', @@ -64,7 +64,7 @@ def _check_sbml_annotations(model): for key in ["kegg.reaction", "sbo", "ec-code", "rhea", "metanetx.reaction", "bigg.reaction", "biocyc"]: assert key in annotation - assert annotation["biocyc"] == 'META:ACETALD-DEHYDROG-RXN' + assert annotation["biocyc"] == ['META:ACETALD-DEHYDROG-RXN'] def test_read_sbml_annotations(data_directory): diff --git a/cobra/test/test_io/test_annotation_format.py b/cobra/test/test_io/test_annotation_format.py index 1b5fea619..a79ff5d3f 100644 --- a/cobra/test/test_io/test_annotation_format.py +++ b/cobra/test/test_io/test_annotation_format.py @@ -21,7 +21,11 @@ def test_load_json_model_valid(data_directory, tmp_path): def test_load_json_model_invalid(data_directory): - """Test that loading an invalid annotation from JSON raises TypeError""" + """Test loading an annotation in the form of list of list""" path = join(data_directory, "invalid_annotation_format.json") - with pytest.raises(TypeError): - model = load_json_model(path) + expected = { + 'kegg.compound': ['C01468'], + 'chebi': ['CHEBI:11981', 'CHEBI:17847'] + } + model = load_json_model(path) + assert model.metabolites[0].annotation == expected diff --git a/cobra/test/test_io/test_json.py b/cobra/test/test_io/test_json.py index 5b157232e..0d4dd575a 100644 --- a/cobra/test/test_io/test_json.py +++ b/cobra/test/test_io/test_json.py @@ -39,3 +39,20 @@ def test_save_json_model(tmpdir, mini_model): with open(output_file, "r") as infile: loaded = json.load(infile) assert jsonschema.validate(loaded, cio.json.json_schema) + + +def test_consistent_annotation_values(data_directory): + """Test if annotation are consistently represented as list""" + model = cio.read_sbml_model(join(data_directory, "mini_fbc2.xml")) + # annotation of genes + for gene in model.genes: + for key in list(gene.annotation.keys()): + assert isinstance(gene.annotation[key], list) + # annotation of metabolites + for metabolite in model.metabolites: + for key in list(metabolite.annotation.keys()): + assert isinstance(metabolite.annotation[key], list) + # annotation of reaction + for reaction in model.genes: + for key in list(reaction.annotation.keys()): + assert isinstance(reaction.annotation[key], list) diff --git a/cobra/test/test_io/test_sbml.py b/cobra/test/test_io/test_sbml.py index 602a8308e..8d9e87b69 100644 --- a/cobra/test/test_io/test_sbml.py +++ b/cobra/test/test_io/test_sbml.py @@ -360,7 +360,7 @@ def test_gprs(data_directory, tmp_path): def test_identifiers_annotation(): - from cobra.io.sbml import _parse_annotation_info + from cobra.io.sbml import parse_annotation_info for uri in [ "http://identifiers.org/chebi/CHEBI:000123", @@ -368,7 +368,7 @@ def test_identifiers_annotation(): "http://identifiers.org/CHEBI:000123", "https://identifiers.org/CHEBI:000123", ]: - data = _parse_annotation_info(uri) + data = parse_annotation_info(uri) assert data assert data[0] == "chebi" assert data[1] == "CHEBI:000123" @@ -379,7 +379,7 @@ def test_identifiers_annotation(): "http://identifiers.org/taxonomy:9602", "https://identifiers.org/taxonomy:9602", ]: - data = _parse_annotation_info(uri) + data = parse_annotation_info(uri) assert data assert data[0] == "taxonomy" assert data[1] == "9602" @@ -388,7 +388,7 @@ def test_identifiers_annotation(): "http://identifier.org/taxonomy/9602", "https://test.com", ]: - data = _parse_annotation_info(uri) + data = parse_annotation_info(uri) assert data is None @@ -414,39 +414,39 @@ def test_smbl_with_notes(data_directory, tmp_path): } metabolite_annotations = { '2hb_e': {'sbo': 'SBO:0000247', - 'inchi': 'InChI=1S/C4H8O3/c1-2-3(5)4(6)7/h3,5H,2H2,1H3,' - '(H,6,7)', - 'chebi': 'CHEBI:1148'}, + 'inchi': ['InChI=1S/C4H8O3/c1-2-3(5)4(6)7/h3,5H,2H2,1H3,' + '(H,6,7)'], + 'chebi': ['CHEBI:1148']}, 'nad_e': {'sbo': 'SBO:0000247', - 'inchi': 'InChI=1S/C21H27N7O14P2/c22-17-12-19(' - '25-7-24-17)28(8-26-12)21-16(32)14(30)11(' - '41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(' - '31)20(40-10)27-3-1-2-9(4-27)18(' - '23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,' - '(H5-,22,23,24,25,33,34,35,36,37)/p-1/t10-,' - '11-,13-,14-,15-,16-,20-,21-/m1/s1', - 'chebi': 'CHEBI:57540'}, - 'h_e': {'sbo': 'SBO:0000247', 'inchi': 'InChI=1S/p+1/i/hH', - 'chebi': 'CHEBI:24636'}, + 'inchi': ['InChI=1S/C21H27N7O14P2/c22-17-12-19(' + '25-7-24-17)28(8-26-12)21-16(32)14(30)11(' + '41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(' + '31)20(40-10)27-3-1-2-9(4-27)18(' + '23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2,' + '(H5-,22,23,24,25,33,34,35,36,37)/p-1/t10-,' + '11-,13-,14-,15-,16-,20-,21-/m1/s1'], + 'chebi': ['CHEBI:57540']}, + 'h_e': {'sbo': 'SBO:0000247', 'inchi': ['InChI=1S/p+1/i/hH'], + 'chebi': ['CHEBI:24636']}, '2obut_e': {'sbo': 'SBO:0000247', - 'inchi': 'InChI=1S/C4H6O3/c1-2-3(5)4(6)7/h2H2,1H3,(H,6,' - '7)/p-1', - 'chebi': 'CHEBI:16763'}, + 'inchi': ['InChI=1S/C4H6O3/c1-2-3(5)4(6)7/h2H2,1H3,(H,6,' + '7)/p-1'], + 'chebi': ['CHEBI:16763']}, 'nadh_e': {'sbo': 'SBO:0000247', - 'inchi': 'InChI=1S/C21H29N7O14P2/c22-17-12-19(' - '25-7-24-17)28(8-26-12)21-16(32)14(30)11(' - '41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(' - '29)15(31)20(40-10)27-3-1-2-9(4-27)18(' - '23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2,' - '5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24,' - '25)/p-2/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1', - 'chebi': 'CHEBI:57945'} + 'inchi': ['InChI=1S/C21H29N7O14P2/c22-17-12-19(' + '25-7-24-17)28(8-26-12)21-16(32)14(30)11(' + '41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(' + '29)15(31)20(40-10)27-3-1-2-9(4-27)18(' + '23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2,' + '5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24,' + '25)/p-2/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1'], + 'chebi': ['CHEBI:57945']} } reaction_notes = {'CONFIDENCE_LEVEL': '4', 'NOTES': 'NCD', 'SUBSYSTEM': 'Propanoate metabolism', 'GENE_ASSOCIATION': '(HGNC:8546 and HGNC:8548) or' ' (HGNC:8547 and HGNC:8548)'} - reaction_annotations = {'sbo': 'SBO:0000176', 'ec-code': '1.1.1.27', + reaction_annotations = {'sbo': 'SBO:0000176', 'ec-code': ['1.1.1.27'], 'pubmed': ['10108', '21765']} for met_id in metabolite_notes: