Merge pull request #13 from vivarium-collective/api

feat: base-implementation and content for verification api
vivarium-collective · May 17, 2024 · e1e52fa · e1e52fa
2 parents 38db4a2 + b541d58
commit e1e52fa
Show file tree

Hide file tree

Showing 42 changed files with 9,250 additions and 3,745 deletions.
diff --git a/biosimulator_processes/__init__.py b/biosimulator_processes/__init__.py
@@ -14,14 +14,18 @@
 PROCESSES_TO_REGISTER = [
     ('cobra', 'cobra_process.CobraProcess'),
     ('copasi', 'copasi_process.CopasiProcess'),
+    ('_copasi', 'copasi_process._CopasiProcess'),
     ('smoldyn', 'smoldyn_process.SmoldynProcess'),
     ('tellurium', 'tellurium_process.TelluriumProcess'),
-    ('amici', 'amici_process.AmiciProcess')]
+    ('amici', 'amici_process.AmiciProcess'),
+    ('compare_ode_process', 'comparator_process.ODEComparatorProcess')]
 
 STEPS_TO_REGISTER = [
-    ('get_sbml', 'get_sbml.GetSbml'),
+    ('get_sbml_step', 'get_sbml.GetSbmlStep'),
     ('plotter', 'viz.CompositionPlotter'),
-    ('plotter2d', 'viz.Plotter2d')]
+    ('plotter2d', 'viz.Plotter2d'),
+    ('compare_ode_step', 'comparator_step.ODEComparatorStep')]
+
 
 # core process registry implementation (unique to this package)
 CORE = ProcessTypes()
@@ -30,4 +34,3 @@
 CORE.type_registry.register('sed_model', schema={'_type': MODEL_TYPE})
 register_module(PROCESSES_TO_REGISTER, CORE)
 register_module(STEPS_TO_REGISTER, CORE)
-
diff --git a/biosimulator_processes/data_model/__init__.py b/biosimulator_processes/data_model/__init__.py
@@ -7,6 +7,8 @@
 
 
 from dataclasses import dataclass, asdict, field
+
+import numpy as np
 from pydantic import BaseModel, ConfigDict
 
 
@@ -18,3 +20,19 @@ def to_dict(self):
 
 class _BaseModel(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
+
+
+class DescriptiveArray(np.ndarray):
+    """Array with metadata."""
+
+    def __new__(cls, array, dtype=None, order=None, description: str = None, metadata: dict = None, **kwargs):
+        obj = np.asarray(array, dtype=dtype, order=order).view(cls)
+        metadata = metadata or kwargs
+        metadata['description'] = description
+        obj.metadata = metadata
+        return obj
+
+    def __array_finalize__(self, obj):
+        if obj is None:
+            return
+        self.metadata = getattr(obj, 'metadata', None)
diff --git a/biosimulator_processes/data_model/compare_data_model.py b/biosimulator_processes/data_model/compare_data_model.py
@@ -1,77 +1,219 @@
+"""
+Compare Data Model:
+    Objects whose purpose is to compare the output of 'like-minded' simulators/processes
+which reside in a shared compositional space. The global 'state' of this composition
+is agnostic to any summation of values.
+
+
+Such engineering should be performed by an expeditionary of semantic unity, using
+vocabulary as their protection. The Explorer is truly that: unafraid to step outside of
+the unifying 'glossary' in the name of expanding it. Semantics are of both great
+use and immense terror to the Explorer. The Explorer firmly understands and believes
+these worldly facts.
+
+author: Alex Patrie
+license: Apache License, Version 2.0
+date: 04/2024
+"""
+
+
 from typing import *
 from abc import ABC
 from dataclasses import dataclass
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+from process_bigraph import Composite, pf
+from pydantic import Field, field_validator
+
 from biosimulator_processes.utils import prepare_single_ode_process_document
+from biosimulator_processes.data_model import _BaseModel as BaseModel, _BaseClass
+from biosimulator_processes import CORE
 
-from typing import List, Dict, Tuple, Any
-from biosimulator_processes.data_model import _BaseModel as BaseModel
 
+# TODO: Transpose data frame and make col vectors for each param, where the index is param name,
+    # and cols are simulator id.
 
-class SimulatorComparisonResult(BaseModel):
-    simulators: List[str]
-    value: Dict[Tuple[str], Dict[str, Any]]
+
+class ParamIntervalOutputData(BaseModel):
+    param_name: str
+    value: float
 
 
-class ResultData(BaseModel):
-    name: str
-    value: Union[float, int, str]
-    mse: float
+class IntervalOutput(BaseModel):
+    interval_id: float
+    data: Union[Dict[str, float], List[ParamIntervalOutputData]]
 
 
-class IntervalResult(BaseModel):
-    global_time_stamp: float
-    # results: Dict[str, Any]
-    results: List[ResultData]
+class ParameterScore(BaseModel):
+    """Base class for parameter scores in-general."""
+    param_name: str
+    value: float
 
 
-class RMSE(BaseModel):
+class ParameterMSE(ParameterScore):
+    """Attribute of Process Parameter RMSE"""
     param_name: str
+    value: float = Field(...)  # TODO: Ensure field validation/setting for MSE-specific calculation.
+    mean: float
+    process_id: str
+
+    @classmethod
+    @field_validator('value')
+    def set_value(cls, v):
+        # TODO: Finish this.
+        return v
+
+
+class ProcessParameterRMSE(BaseModel):
+    """Attribute of Process Fitness Score"""
+    process_id: str
+    param_id: str  # mostly species names or something like that
+    value: float  # use calculate rmse here
+
+
+class ProcessFitnessScore(BaseModel):
+    """Attribute of Simulator Process Output Based on the list of interval results"""
+    process_id: str
+    error: float  # the error by which to bias the rmse calculation
+    rmse_values: List[ProcessParameterRMSE]  # indexed by parameter name over whole simulation
+
+
+class IntervalOutputData(BaseModel):
+    """Attribute of Simulator Process Output"""
+    param_name: str  # data name
     value: float
+    time_id: float  # index for composite run inference
+    mse: ParameterMSE
 
 
-class SimulatorResult(BaseModel):
+class SimulatorProcessOutput(BaseModel):
+    """Attribute of Process Comparison Result"""
     process_id: str
     simulator: str
-    data: List[IntervalResult]
-    rmse: List[RMSE]
+    data: List[IntervalOutputData]
+    fitness_score: ProcessFitnessScore
 
 
-class ComparisonResults(BaseModel):
+class ProcessComparisonResult(BaseModel):
+    """Generic class inherited for all process comparisons."""
+    duration: int
+    num_steps: int
+    simulators: List[str]
+    outputs: List[SimulatorProcessOutput]
+    timestamp: str = str(
+        datetime.now()) \
+        .replace(' ', '_') \
+        .replace(':', '-') \
+        .replace('.', '-')
+
+
+# DATA MODEL USED
+@dataclass
+class ODEProcessIntervalComparison(_BaseClass):
+    mse_data: pd.DataFrame
+    rmse_data: pd.DataFrame
+    inner_prod_data: pd.DataFrame
+    outer_prod_data: Dict
+    time_id: int 
+
+
+@dataclass
+class ODEIntervalResult(_BaseClass):
+    interval_id: float
+    copasi_floating_species_concentrations: Dict[str, float]
+    tellurium_floating_species_concentrations: Dict[str, float]
+    amici_floating_species_concentrations: Dict[str, float]
+    time: float
+
+
+@dataclass
+class ODEComparisonResult(_BaseClass):
     duration: int
     num_steps: int
     biomodel_id: str
-    outputs: List[SimulatorResult]
+    timestamp: str
+    outputs: Optional[List[ODEIntervalResult]] = None
+
+    def __init__(self, duration, num_steps, biomodel_id):
+        super().__init__()
+        self.duration = duration
+        self.num_steps = num_steps
+        self.biomodel_id = biomodel_id
+        self.outputs = self._set_outputs()
+        self.timestamp = self._set_timestamp()
 
+    @classmethod
+    def _set_timestamp(cls):
+        return str(datetime.now()).replace(' ', '_').replace(':', '-').replace('.', '-')
 
-"""
+    def _set_outputs(self):
+        return self.generate_ode_interval_outputs(
+            self.duration,
+            self.num_steps,
+            self.biomodel_id)
 
-For example: 
-
-result = ComparisonResults(
-            10, 
-            20, 
-            'BIOMD0000023', 
-            [
-                SimulatorResult(
-                    my_process, 
-                    copasi, 
-                    [
-                        IntervalResult(
-                            0, 
-                            [
-                                ResultData('T', 2.9, 0.7)
-                            ]
-                    ]
-                )
-            ]
+    def generate_ode_interval_outputs(self, duration: int, n_steps: int, biomodel_id: str) -> List[ODEIntervalResult]:
+        def _generate_ode_interval_results(duration: int, n_steps: int, biomodel_id: str) -> List[ODEIntervalResult]:
+            results_dict = self.generate_ode_comparison(biomodel_id, duration)
+            simulator_names = ['copasi', 'tellurium', 'amici']
+            interval_results = []
 
-"""
+            for global_time_index, interval_result_data in enumerate(results_dict['outputs']):
+                interval_config = {
+                    'interval_id': float(global_time_index),
+                    'time': interval_result_data['time']
+                }
+
+                for k, v in interval_result_data.items():
+                    for simulator_name in simulator_names:
+                        if simulator_name in k:
+                            interval_config[f'{simulator_name}_floating_species_concentrations'] = v
+
+                interval_result = ODEIntervalResult(**interval_config)
+                interval_results.append(interval_result)
+
+            return interval_results
+
+        return _generate_ode_interval_results(duration, n_steps, biomodel_id)
+
+    @classmethod
+    def generate_ode_comparison(cls, biomodel_id: str, dur: int) -> Dict:
+        """Run the `compare_ode_step` composite and return data which encapsulates another composite
+            workflow specified by dir.
+
+            Args:
+                biomodel_id:`str`: A Valid Biomodel ID.
+                dur:`int`: duration of the internal composite simulation.
+
+            Returns:
+                `Dict` of simulation comparison results like `{'outputs': {...etc}}`
+        """
+        compare = {
+            'compare_ode': {
+                '_type': 'step',
+                'address': 'local:compare_ode_step',
+                'config': {'biomodel_id': biomodel_id, 'duration': dur},
+                'inputs': {},
+                'outputs': {'comparison_data': ['comparison_store']}
+            },
+            'verification_data': {
+                '_type': 'step',
+                'address': 'local:ram-emitter',
+                'config': {
+                    'emit': {'comparison_data': 'tree[any]'}
+                },
+                'inputs': {'comparison_data': ['comparison_store']}
+            }
+        }
+
+        wf = Composite(config={'state': compare}, core=CORE)
+        wf.run(1)
+        comparison_results = wf.gather_results()
+        output = comparison_results[("verification_data"),][0]['comparison_data']
 
-class ProcessAttributes(BaseModel):
-    name: str
-    initial_state: Dict[str, Any]
-    inputs: Dict[str, Any]
-    outputs: Dict[str, Any]
+        return {'outputs': output[('emitter',)]}
 
 
 class CompositeRunError(BaseModel):

diff --git a/biosimulator_processes/io.py b/biosimulator_processes/io.py
@@ -0,0 +1,26 @@
+from typing import *
+from tempfile import mkdtemp
+import requests
+import zipfile as zf
+import os
+
+
+def fetch_sbml_file(biomodel_id: str, save_dir: Optional[str] = None) -> str:
+    url = f'https://www.ebi.ac.uk/biomodels/search/download?models={biomodel_id}'
+    headers = {'accept': '*/*'}
+    response = requests.get(url, headers=headers)
+    model_filename = f'{biomodel_id}.xml'
+    dirpath = save_dir or mkdtemp()  # os.getcwd()
+    response_zip_fp = os.path.join(dirpath, 'results.zip')
+    if not os.path.exists(response_zip_fp):
+        try:
+            with open(response_zip_fp, 'wb') as f:
+                f.write(response.content)
+
+            with zf.ZipFile(response_zip_fp, 'r') as zipRef:
+                zipRef.extract(model_filename, dirpath)
+
+            os.remove(response_zip_fp)
+            return os.path.join(dirpath, model_filename)
+        except Exception as e:
+            print(e)
diff --git a/biosimulator_processes/processes/__pycache__/__init__.cpython-310.pyc b/biosimulator_processes/processes/__pycache__/__init__.cpython-310.pyc
diff --git a/biosimulator_processes/processes/__pycache__/cobra_process.cpython-310.pyc b/biosimulator_processes/processes/__pycache__/cobra_process.cpython-310.pyc
diff --git a/biosimulator_processes/processes/__pycache__/copasi_process.cpython-310.pyc b/biosimulator_processes/processes/__pycache__/copasi_process.cpython-310.pyc
diff --git a/biosimulator_processes/processes/__pycache__/smoldyn_process.cpython-310.pyc b/biosimulator_processes/processes/__pycache__/smoldyn_process.cpython-310.pyc
diff --git a/biosimulator_processes/processes/__pycache__/tellurium_process.cpython-310.pyc b/biosimulator_processes/processes/__pycache__/tellurium_process.cpython-310.pyc