Skip to content

Commit

Permalink
chore!: rename transform class and data dir to transformers
Browse files Browse the repository at this point in the history
close #393
  • Loading branch information
korikuzma committed Oct 30, 2024
1 parent c9cffe1 commit 7a9bbbc
Show file tree
Hide file tree
Showing 20 changed files with 82 additions and 80 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
metakb.transform.civic
======================
metakb.transformers.base
========================

.. automodule:: metakb.transform.civic
.. automodule:: metakb.transformers.base
:members:
:undoc-members:
:special-members: __init__
:exclude-members: model_fields, model_config, model_computed_fields
:exclude-members: model_fields, model_config, model_computed_fields
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
metakb.transform.moa
====================
metakb.transformers.civic
=========================

.. automodule:: metakb.transform.moa
.. automodule:: metakb.transformers.civic
:members:
:undoc-members:
:special-members: __init__
:exclude-members: model_fields, model_config, model_computed_fields
:exclude-members: model_fields, model_config, model_computed_fields
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
metakb.transform.base
=====================
metakb.transformers.moa
=======================

.. automodule:: metakb.transform.base
.. automodule:: metakb.transformers.moa
:members:
:undoc-members:
:special-members: __init__
:exclude-members: model_fields, model_config, model_computed_fields
:exclude-members: model_fields, model_config, model_computed_fields
6 changes: 3 additions & 3 deletions docs/source/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,6 @@ Transformers
:toctree: api/
:template: module_summary.rst

metakb.transform.base
metakb.transform.civic
metakb.transform.moa
metakb.transformers.base
metakb.transformers.civic
metakb.transformers.moa
24 changes: 13 additions & 11 deletions src/metakb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)
from metakb.normalizers import check_normalizers as check_normalizer_health
from metakb.schemas.app import SourceName
from metakb.transform import CivicTransform, MoaTransform
from metakb.transformers import CivicTransformer, MoaTransformer

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -451,12 +451,12 @@ def load_cdm(

for src in sorted([s.value for s in SourceName]):
pattern = f"{src}_cdm_{version}.json"
globbed = (APP_ROOT / "data" / src / "transform").glob(pattern)
globbed = (APP_ROOT / "data" / src / "transformers").glob(pattern)

try:
path = sorted(globbed)[-1]
except IndexError as e:
msg = f"No valid transform file found matching pattern: {pattern}"
msg = f"No valid transformation file found matching pattern: {pattern}"
raise FileNotFoundError(msg) from e

load_from_json(path, driver)
Expand Down Expand Up @@ -534,12 +534,12 @@ async def update(
sources = tuple(SourceName)
for src in sorted([s.value for s in sources]):
pattern = f"{src}_cdm_*.json"
globbed = (APP_ROOT / "data" / src / "transform").glob(pattern)
globbed = (APP_ROOT / "data" / src / "transformers").glob(pattern)

try:
path = sorted(globbed)[-1]
except IndexError as e:
msg = f"No valid transform file found matching pattern: {pattern}"
msg = f"No valid transformation files found matching pattern: {pattern}"
raise FileNotFoundError(msg) from e

load_from_json(path, driver)
Expand Down Expand Up @@ -621,19 +621,21 @@ async def _transform_source(
:param output_directory: custom directory to store output to -- use source defaults
if not given
"""
transform_sources = {
SourceName.CIVIC: CivicTransform,
SourceName.MOA: MoaTransform,
transformer_sources = {
SourceName.CIVIC: CivicTransformer,
SourceName.MOA: MoaTransformer,
}
_echo_info(f"Transforming {source.as_print_case()}...")
start = timer()
transformer: CivicTransform | MoaTransform = transform_sources[source](
transformer: CivicTransformer | MoaTransformer = transformer_sources[source](
normalizers=normalizer_handler, harvester_path=harvest_file
)
harvested_data = transformer.extract_harvested_data()
await transformer.transform(harvested_data)
end = timer()
_echo_info(f"{source.as_print_case()} transform finished in {(end - start):.2f} s.")
_echo_info(
f"{source.as_print_case()} transformation finished in {(end - start):.2f} s."
)
output_file = (
output_directory / f"{source.value}_cdm_{_current_date_string()}.json"
if output_directory
Expand Down Expand Up @@ -715,7 +717,7 @@ def _retrieve_s3_cdms() -> str:
with tmp_path.open("wb") as f:
file.Object().download_fileobj(f)

cdm_dir = APP_ROOT / "data" / source / "transform"
cdm_dir = APP_ROOT / "data" / source / "transformers"
cdm_zip = ZipFile(tmp_path, "r")
cdm_zip.extract(f"{source}_cdm_{newest_version}.json", cdm_dir)

Expand Down
4 changes: 0 additions & 4 deletions src/metakb/transform/__init__.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Transformations
We take the harvested JSON from each source and transform this to our common data model.
We take the harvested JSON from each source and transform this to our common data model.


### Using the transformation modules
The VICC normalizers must first be installed.
The VICC normalizers must first be installed.

```
pip install thera-py
Expand Down Expand Up @@ -34,4 +34,4 @@ python3 -m gene.cli --normalizer="hgnc"
[disease-normalizer](https://github.com/cancervariants/disease-normalization)
```
python3 -m disease.cli --update_all --update_merged
```
```
4 changes: 4 additions & 0 deletions src/metakb/transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Transformations for sources."""

from .civic import CivicTransformer # noqa: F401
from .moa import MoaTransformer # noqa: F401
18 changes: 9 additions & 9 deletions src/metakb/transform/base.py → src/metakb/transformers/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""A module for the Transform base class."""
"""A module for the Transformer base class."""

import datetime
import json
Expand Down Expand Up @@ -111,7 +111,7 @@ class TransformedData(BaseModel):
documents: list[Document] = []


class Transform(ABC):
class Transformer(ABC):
"""A base class for transforming harvester data."""

_methods: ClassVar[list[Method]] = [
Expand Down Expand Up @@ -234,13 +234,13 @@ def __init__(
harvester_path: Path | None = None,
normalizers: ViccNormalizers | None = None,
) -> None:
"""Initialize Transform base class.
"""Initialize Transformer base class.
:param Path data_dir: Path to source data directory
:param Optional[Path] harvester_path: Path to previously harvested data
:param ViccNormalizers normalizers: normalizer collection instance
"""
self.name = self.__class__.__name__.lower().split("transform")[0]
self.name = self.__class__.__name__.lower().split("transformer")[0]
self.data_dir = data_dir / self.name
self.harvester_path = harvester_path

Expand Down Expand Up @@ -362,7 +362,7 @@ def _get_combination_therapy(
Combination Therapy
"""
components = []
source_name = type(self).__name__.lower().replace("transform", "")
source_name = type(self).__name__.lower().replace("transformers", "")

for therapy in therapies:
if source_name == SourceName.MOA:
Expand Down Expand Up @@ -516,15 +516,15 @@ def create_json(self, cdm_filepath: Path | None = None) -> None:
:param cdm_filepath: Path to the JSON file where the CDM data will be
stored. If not provided, will use the default path of
``<APP_ROOT>/data/<src_name>/transform/<src_name>_cdm_YYYYMMDD.json``
``<APP_ROOT>/data/<src_name>/transformers/<src_name>_cdm_YYYYMMDD.json``
"""
if not cdm_filepath:
transform_dir = self.data_dir / "transform"
transform_dir.mkdir(exist_ok=True, parents=True)
transformers_dir = self.data_dir / "transformers"
transformers_dir.mkdir(exist_ok=True, parents=True)
today = datetime.datetime.strftime(
datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT
)
cdm_filepath = transform_dir / f"{self.name}_cdm_{today}.json"
cdm_filepath = transformers_dir / f"{self.name}_cdm_{today}.json"

with cdm_filepath.open("w+") as f:
json.dump(self.processed_data.model_dump(exclude_none=True), f, indent=2)
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
VariantTherapeuticResponseStudyPredicate,
_VariantOncogenicityStudyQualifier,
)
from metakb.transform.base import (
from metakb.transformers.base import (
CivicEvidenceLevel,
MethodId,
TherapeuticProcedureType,
Transform,
Transformer,
)

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -99,7 +99,7 @@ class SourcePrefix(str, Enum):
ASH = "ASH"


class CivicTransform(Transform):
class CivicTransformer(Transformer):
"""A class for transforming CIViC to the common data model."""

def __init__(
Expand All @@ -108,7 +108,7 @@ def __init__(
harvester_path: Path | None = None,
normalizers: ViccNormalizers | None = None,
) -> None:
"""Initialize CIViC Transform class.
"""Initialize CIViC Transformer class.
:param data_dir: Path to source data directory
:param harvester_path: Path to previously harvested CIViC data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@
VariantTherapeuticResponseStudyPredicate,
_VariantOncogenicityStudyQualifier,
)
from metakb.transform.base import (
from metakb.transformers.base import (
MethodId,
MoaEvidenceLevel,
TherapeuticProcedureType,
Transform,
Transformer,
)

logger = logging.getLogger(__name__)


class MoaTransform(Transform):
class MoaTransformer(Transformer):
"""A class for transforming MOA resources to common data model."""

def __init__(
Expand All @@ -44,7 +44,7 @@ def __init__(
harvester_path: Path | None = None,
normalizers: ViccNormalizers | None = None,
) -> None:
"""Initialize MOAlmanac Transform class.
"""Initialize MOAlmanac Transformer class.
:param data_dir: Path to source data directory
:param harvester_path: Path to previously harvested MOA data
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

TEST_DATA_DIR = Path(__file__).resolve().parents[0] / "data"
TEST_HARVESTERS_DIR = TEST_DATA_DIR / "harvesters"
TEST_TRANSFORM_DIR = TEST_DATA_DIR / "transform"
TEST_TRANSFORMERS_DIR = TEST_DATA_DIR / "transformers"


def pytest_addoption(parser):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,4 +433,4 @@
]
}
]
}
}
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -257,4 +257,4 @@
]
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORM_DIR
from tests.conftest import TEST_TRANSFORMERS_DIR

from metakb.transform.civic import CivicTransform
from metakb.transformers.civic import CivicTransformer

DATA_DIR = TEST_TRANSFORM_DIR / "diagnostic"
DATA_DIR = TEST_TRANSFORMERS_DIR / "diagnostic"
FILENAME = "civic_cdm.json"


@pytest_asyncio.fixture(scope="module")
async def data(normalizers):
"""Create a CIViC Transform test fixture."""
"""Create a CIViC Transformer test fixture."""
harvester_path = DATA_DIR / "civic_harvester.json"
c = CivicTransform(
c = CivicTransformer(
data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers
)
await c.transform()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORM_DIR
from tests.conftest import TEST_TRANSFORMERS_DIR

from metakb.transform.civic import CivicTransform
from metakb.transformers.civic import CivicTransformer

DATA_DIR = TEST_TRANSFORM_DIR / "prognostic"
DATA_DIR = TEST_TRANSFORMERS_DIR / "prognostic"
FILENAME = "civic_cdm.json"


@pytest_asyncio.fixture(scope="module")
async def data(normalizers):
"""Create a CIViC Transform test fixture."""
"""Create a CIViC Transformer test fixture."""
harvester_path = DATA_DIR / "civic_harvester.json"
c = CivicTransform(
c = CivicTransformer(
data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers
)
await c.transform()
Expand Down Expand Up @@ -79,7 +79,7 @@ def test_civic_cdm(
check_method,
check_transformed_cdm,
):
"""Test that civic transform works correctly."""
"""Test that civic transformation works correctly."""
check_transformed_cdm(
data,
statements,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORM_DIR
from tests.conftest import TEST_TRANSFORMERS_DIR

from metakb.transform.civic import CivicTransform
from metakb.transformers.civic import CivicTransformer

DATA_DIR = TEST_TRANSFORM_DIR / "therapeutic"
DATA_DIR = TEST_TRANSFORMERS_DIR / "therapeutic"
FILENAME = "civic_cdm.json"


@pytest_asyncio.fixture(scope="module")
async def data(normalizers):
"""Create a CIViC Transform test fixture."""
"""Create a CIViC Transformer test fixture."""
harvester_path = DATA_DIR / "civic_harvester.json"
c = CivicTransform(
c = CivicTransformer(
data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers
)
harvested_data = c.extract_harvested_data()
Expand All @@ -33,5 +33,5 @@ def studies(civic_eid2997_study, civic_eid816_study, civic_eid9851_study):


def test_civic_cdm(data, studies, check_transformed_cdm):
"""Test that civic transform works correctly."""
"""Test that civic transformation works correctly."""
check_transformed_cdm(data, studies, DATA_DIR / FILENAME)
Loading

0 comments on commit 7a9bbbc

Please sign in to comment.