Skip to content

Commit

Permalink
updated unit tests and did some bugfixes in the pam_generation script
Browse files Browse the repository at this point in the history
  • Loading branch information
SamiralVdB committed Jan 16, 2025
1 parent 2330596 commit c48e82e
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 50 deletions.
38 changes: 21 additions & 17 deletions src/PAModelpy/utils/pam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,19 @@ def parse_gpr_information(gpr_info:str,
if genes is None: return gpr_list

#convert the genes to the associated proteins
enzyme_relations = []
if '_'in enzyme_id:
enzyme_relations = [enzyme_id.split('_')]
for sublist in gpr_list:
enz_sublist = []
for item in sublist:
if item in gene2protein.keys():
if '_' not in gene2protein[item]:
enz_sublist.append(gene2protein[item])
enzyme_relations += [enz_sublist]
elif gene2protein[item].split('_') not in enzyme_relations:
enzyme_relations += [gene2protein[item].split('_')]
enzyme_relations = _filter_sublists(enzyme_relations, enzyme_id.split('_'), how='all')
# enzyme_relations = []
# if '_'in enzyme_id:
enzyme_relations = [enzyme_id.split('_')]
# for sublist in gpr_list:
# enz_sublist = []
# for item in sublist:
# if item in gene2protein.keys():
# if '_' not in gene2protein[item]:
# enz_sublist.append(gene2protein[item])
# enzyme_relations += enz_sublist
# elif gene2protein[item].split('_') not in enzyme_relations:
# enzyme_relations += gene2protein[item].split('_')
# enzyme_relations = _filter_sublists(enzyme_relations, enzyme_id.split('_'), how='all')
return sorted(gpr_list), sorted(enzyme_relations)

def get_protein_gene_mapping(enzyme_db: pd.DataFrame, model) -> tuple[dict, dict]:
Expand Down Expand Up @@ -281,22 +281,26 @@ def _order_enzyme_complex_id(enz_id:str,
return "_".join(sorted(proteins))


def parse_reaction2protein(enzyme_db: pd.DataFrame, model: cobra.Model) -> dict:
def parse_reaction2protein(enzyme_db: pd.DataFrame,
model: cobra.Model,
other_enzyme_id_pattern: str = r'E[0-9][0-9]*') -> dict:
rxn_info2protein = {}
protein2gpr = defaultdict(list)
#remove copy number substrings from the reaction to make it matchable to enzyme information
filtered_model_reactions = [_extract_reaction_id(r.id) for r in model.reactions]

#make sure all enzyme complexes have an id ordered in a structured way
enzyme_db['enzyme_id'] = enzyme_db['enzyme_id'].map(_order_enzyme_complex_id, na_action='ignore')

# replace NaN values with unique identifiers
enzyme_db.loc[enzyme_db['enzyme_id'].isnull(), 'enzyme_id'] = [f'E{i}' for i in
range(enzyme_db['enzyme_id'].isnull().sum())]

enzyme_db.loc[enzyme_db['gene'].isnull(), 'gene'] = [[f'gene_{i}'] for i in
range(enzyme_db['gene'].isnull().sum())]


#make sure all enzyme complexes have an id ordered in a structured way
enzyme_db['enzyme_id'] = enzyme_db['enzyme_id'].apply(_order_enzyme_complex_id,
other_enzyme_id_pattern = other_enzyme_id_pattern)

protein2gene, gene2protein = _get_genes_for_proteins(enzyme_db, model)

# parse the information for all gene-protein-reaction relations in the dataframe
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import pickle
import os
from src.PAModelpy.configuration import Config
from src.PAModelpy.PAModel import PAModel

Expand All @@ -8,6 +9,7 @@
from Scripts.pam_generation_uniprot_id import (set_up_ecolicore_pam, set_up_ecoli_pam, set_up_toy_pam,
parse_gpr_information_for_protein2genes,
parse_gpr_information_for_rxn2protein)
from src.PAModelpy.utils import set_up_pam


def test_gpr_information_is_parsed_correctly():
Expand Down Expand Up @@ -48,11 +50,13 @@ def test_gpr_information_for_protein_is_correctly_filtered():
def test_if_enzyme_complex_in_toy_pam_is_parsed_correctly():
sut = set_up_toy_pam_with_enzyme_complex(sensitivity=False)

assert all([enz in sut.enzymes for enz in ['E1', 'E2', 'E10', 'E2_E10']])
assert all([const not in sut.constraints.keys() for const in ['EC_E10_f', 'EC_E2_f']])
constraint = sut.constraints['EC_E2_E10_f'].get_linear_coefficients([sut.reactions.CE_R2_E2_E10.forward_variable])
assert constraint[sut.reactions.CE_R2_E2_E10.forward_variable] > 0
print(sut.enzymes, sut.enzyme_variables)

assert all([enz in sut.enzymes for enz in ['E1', 'E10_E2']])
assert all([const not in sut.constraints.keys() for const in ['EC_E10_f', 'EC_E2_f']])
constraint = sut.constraints['EC_E10_E2_f'].get_linear_coefficients([sut.reactions.CE_R2_E10_E2.forward_variable])
assert constraint[sut.reactions.CE_R2_E10_E2.forward_variable] > 0
#
def test_if_isozymes_in_toy_pam_are_parsed_correctly():
sut = set_up_toy_pam_with_isozymes(sensitivity=False)

Expand Down Expand Up @@ -102,26 +106,16 @@ def test_if_toy_pam_with_enzyme_comples_has_same_growth_rate_as_without():

assert sut.objective.value == pytest.approx(toy_pam.objective.value, abs = 1e-6)

def test_set_up_ecolicore_pam_works():
sut = set_up_ecolicore_pam()
sut.optimize()
assert True
def test_if_ecolicore_pam_optimizes():
sut = set_up_ecolicore_pam()
sut.optimize()
assert sut.objective.value > 0

def test_set_up_ecoli_pam_works():
sut = set_up_ecoli_pam()
assert True
def test_if_pamodel_can_be_pickled_and_unpickled():
# Arrange
pam_data_file = os.path.join('tests', 'data', 'proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx')
iml1515 = os.path.join('Models', 'iML1515.xml')
sut = set_up_pam(pam_data_file,
iml1515,
sensitivity=False,
adjust_reaction_ids=False)

def test_if_ecoli_pam_optimizes():
sut = set_up_ecoli_pam()
sut.optimize()
assert sut.objective.value > 0

def test_if_pamodel_can_be_pickled_and_unpickled():
sut = set_up_ecoli_pam(sensitivity=False)
sut.change_reaction_bounds('EX_glc__D_e', -10, 0)
sut.optimize()

Expand Down Expand Up @@ -151,9 +145,15 @@ def set_up_toy_pam_with_enzyme_complex(sensitivity =True):
Etot = 0.6*1e-3
model = build_toy_gem()
active_enzyme = build_active_enzyme_sector(config)

#add an enzyme associated to enzyme complex to the toy model
active_enzyme.rxn2protein['R2']['E2']['protein_reaction_association'] = [['E2', 'E10']]
active_enzyme.rxn2protein['R2']['E10']= active_enzyme.rxn2protein['R2']['E2'].copy()
# active_enzyme.rxn2protein['R2']['E10']= active_enzyme.rxn2protein['R2']['E2'].copy()
active_enzyme.rxn2protein['R2']['E2_E10'] = active_enzyme.rxn2protein['R2']['E2'].copy()
del active_enzyme.rxn2protein['R2']['E2']

active_enzyme.protein2gene['E2_E10'] = [['gene2', 'gene10']]


#build the toy model
unused_enzyme = build_unused_protein_sector(config)
Expand All @@ -180,6 +180,7 @@ def set_up_toy_pam_with_isozymes(sensitivity =True):
active_enzyme.rxn2protein['R2']['E2']['protein_reaction_association'] = [['E2'], ['E10']]
active_enzyme.rxn2protein['R2']['E10']= active_enzyme.rxn2protein['R2']['E2'].copy()


#build the toy model
unused_enzyme = build_unused_protein_sector(config)
translation_enzyme = build_translational_protein_sector(config)
Expand Down Expand Up @@ -209,6 +210,9 @@ def set_up_toy_pam_with_isozymes_and_enzymecomplex(sensitivity =True):
active_enzyme.rxn2protein['R3']['E3']['protein_reaction_association'] = [['E3','E10', 'E11']]
active_enzyme.rxn2protein['R3']['E10']= active_enzyme.rxn2protein['R3']['E3'].copy()
active_enzyme.rxn2protein['R3']['E11']= active_enzyme.rxn2protein['R3']['E3'].copy()
active_enzyme.rxn2protein['R3']['E3_E10_E11']= active_enzyme.rxn2protein['R3']['E3'].copy()

active_enzyme.protein2gene['E3_E10_E11'] = [['gene3', 'gene10', 'gene11']]


#build the toy model
Expand Down
34 changes: 29 additions & 5 deletions tests/unit_tests/test_pamodel/test_pamodel.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import pytest
from cobra.io import load_json_model
import os

from src.PAModelpy import PAModel,Config,ActiveEnzymeSector, UnusedEnzymeSector, TransEnzymeSector, CatalyticEvent
from Scripts.pam_generation_uniprot_id import set_up_ecoli_pam, set_up_ecolicore_pam
from tests.unit_tests.test_pamodel.test_pam_generation import set_up_toy_pam_with_isozymes_and_enzymecomplex
from tests.unit_tests.test_pamodel.test_pam_setup import set_up_toy_pam_with_isozymes_and_enzymecomplex
from src.PAModelpy.utils import set_up_pam

def test_if_pamodel_change_kcat_function_works():
#arrange
Expand Down Expand Up @@ -43,7 +44,7 @@ def test_if_pamodel_change_kcat_function_works_with_catalytic_reactions():
#arrange
sut = set_up_ecoli_pam(sensitivity=False)
input_kcat = 10
enzyme_id = 'P0ABJ1'
enzyme_id = 'P0ABI8_P0ABJ1_P0ABJ3_P0ABJ6'
rxn_id = "CYTBO3_4pp"
ce_rxn= sut.reactions.query(f'CE_{rxn_id}_{enzyme_id}')[0]
enzyme_complex_id = "_".join(ce_rxn.id.split("_")[3:])
Expand Down Expand Up @@ -173,6 +174,7 @@ def test_if_pamodel_sensitivity_can_be_changed_false_to_true():
def test_if_pamodel_sensitivity_can_be_changed_true_to_false_ecolicore():
# arrange
ecolicore_pam = set_up_ecolicore_pam(sensitivity=True)

glc_lb = -ecolicore_pam.constraints['EX_glc__D_e_lb'].ub
glc_ub = ecolicore_pam.constraints['EX_glc__D_e_ub'].ub

Expand Down Expand Up @@ -269,11 +271,13 @@ def test_if_pamodel_gets_catalyzing_enzymes_for_enzyme_object():
# Arrange
sut = set_up_toy_pam_with_isozymes_and_enzymecomplex(sensitivity = False)
enzyme_ut = 'E10'
associated_enzymes = ['E10', 'E3_E10_E11']
associated_enzymes = ['E10', 'E10_E11_E3']

# Assert
catalyzing_enzymes = sut._get_catalyzing_enzymes_for_enzyme(enzyme_ut)

print(catalyzing_enzymes)

# Assert
assert all(enz in catalyzing_enzymes for enz in associated_enzymes)

Expand Down Expand Up @@ -334,4 +338,24 @@ def assert_bounds(model_ori, model_copy):
def assert_total_protein_content(model_ori, model_copy):
assert model_ori.p_tot == model_copy.p_tot
tot_prot_cons_id = model_ori.TOTAL_PROTEIN_CONSTRAINT_ID
assert model_ori.constraints[tot_prot_cons_id].ub == model_copy.constraints[tot_prot_cons_id].ub
assert model_ori.constraints[tot_prot_cons_id].ub == model_copy.constraints[tot_prot_cons_id].ub

def set_up_ecoli_pam(sensitivity=True):
pam_data_file = os.path.join('tests', 'data', 'proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx')
iml1515 = os.path.join('Models', 'iML1515.xml')
return set_up_pam(pam_data_file,
iml1515,
sensitivity=sensitivity,
adjust_reaction_ids=False)

def set_up_ecolicore_pam(sensitivity=True):
pam_data_file = os.path.join('tests', 'data',
'proteinAllocationModel_iML1515_EnzymaticData_core.xlsx')
ecolicore_gem = load_json_model(os.path.join('Models', 'e_coli_core.json'))

# Apply
return set_up_pam(pam_data_file,
ecolicore_gem,
total_protein=0.1699,
sensitivity=sensitivity,
adjust_reaction_ids=True)
14 changes: 9 additions & 5 deletions tests/unit_tests/test_utils/test_pam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

from Scripts.toy_ec_pam import build_toy_gem
from src.utils.pam_generation import parse_reaction2protein, set_up_pam
from src.PAModelpy.utils.pam_generation import parse_reaction2protein, set_up_pam

def test_if_rxn2protein_info_is_correctly_parsed():
# Arrange
Expand All @@ -20,6 +20,8 @@ def test_if_rxn2protein_info_is_correctly_parsed():
'direction':['f','f', 'f', 'f', 'b']
}
)
print(toy_enzyme_db)

toy_model = build_toy_gem()

expected_rxn2protein = {
Expand Down Expand Up @@ -49,15 +51,17 @@ def test_if_rxn2protein_info_is_correctly_parsed():
expected_protein2gpr = {'E1': [['gene1']], 'E2a': [['gene2a']], 'E2b_E2c': [['gene2b', 'gene2c']], 'E3': [['gene3']]}

# Apply
rxn2protein, protein2gpr = parse_reaction2protein(toy_enzyme_db, toy_model)
rxn2protein, protein2gpr = parse_reaction2protein(toy_enzyme_db,
toy_model,
other_enzyme_id_pattern = r'E[0-9][0-9]*[a-z]?')

# Assert
for output_dict, expected_dict in zip([rxn2protein, protein2gpr], [expected_rxn2protein, expected_protein2gpr]):
assert all([expected_dict[key] == value for key, value in output_dict.items()])

def test_if_set_up_pam_can_build_ecolicore_pam():
#Arrange
pam_data_file = os.path.join('Data', 'proteinAllocationModel_iML1515_EnzymaticData_core.xlsx')
pam_data_file = os.path.join('tests','data', 'proteinAllocationModel_iML1515_EnzymaticData_core.xlsx')
ecolicore_gem = cobra.io.load_json_model(os.path.join('Models', 'e_coli_core.json'))

#Apply
Expand All @@ -74,14 +78,14 @@ def test_if_set_up_pam_can_build_ecolicore_pam():

def test_if_set_up_pam_can_build_iML1515():
#Arrange
pam_data_file = os.path.join('Results', '1_preprocessing', 'proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx')
pam_data_file = os.path.join('tests','data', 'proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx')
iml1515 = os.path.join('Models', 'iML1515.xml')

#Apply
pam = set_up_pam(pam_data_file,
iml1515,
sensitivity=False,
adjust_reaction_ids=True)
adjust_reaction_ids=False)

pam.optimize()

Expand Down

0 comments on commit c48e82e

Please sign in to comment.