diff --git a/cg/cli/workflow/nallo/base.py b/cg/cli/workflow/nallo/base.py index 39c27232a0..794bd8f700 100644 --- a/cg/cli/workflow/nallo/base.py +++ b/cg/cli/workflow/nallo/base.py @@ -5,6 +5,9 @@ import rich_click as click from cg.cli.utils import CLICK_CONTEXT_SETTINGS + +from cg.cli.workflow.nf_analysis import config_case + from cg.constants.constants import MetaApis from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.nallo import NalloAnalysisAPI @@ -18,3 +21,6 @@ def nallo(context: click.Context) -> None: """GMS/Nallo analysis workflow.""" AnalysisAPI.get_help(context) context.obj.meta_apis[MetaApis.ANALYSIS_API] = NalloAnalysisAPI(config=context.obj) + + +nallo.add_command(config_case) diff --git a/cg/meta/workflow/nallo.py b/cg/meta/workflow/nallo.py index ede48ea93e..f53b431b04 100644 --- a/cg/meta/workflow/nallo.py +++ b/cg/meta/workflow/nallo.py @@ -2,8 +2,12 @@ import logging from cg.constants import Workflow +from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex from cg.meta.workflow.nf_analysis import NfAnalysisAPI from cg.models.cg_config import CGConfig +from cg.models.nallo.nallo import NalloSampleSheetHeaders, NalloSampleSheetEntry, NalloParameters +from cg.store.models import CaseSample +from pathlib import Path LOG = logging.getLogger(__name__) @@ -18,3 +22,72 @@ def __init__( workflow: Workflow = Workflow.NALLO, ): super().__init__(config=config, workflow=workflow) + self.root_dir: str = config.nallo.root + self.workflow_bin_path: str = config.nallo.workflow_bin_path + self.profile: str = config.nallo.profile + self.conda_env: str = config.nallo.conda_env + self.conda_binary: str = config.nallo.conda_binary + self.platform: str = config.nallo.platform + self.params: str = config.nallo.params + self.workflow_config_path: str = config.nallo.config + self.resources: str = config.nallo.resources + self.tower_binary_path: str = config.tower_binary_path + self.tower_workflow: str = config.nallo.tower_workflow + self.account: str = config.nallo.slurm.account + self.email: str = config.nallo.slurm.mail_user + self.compute_env_base: str = config.nallo.compute_env + self.revision: str = config.nallo.revision + self.nextflow_binary_path: str = config.nallo.binary_path + + @property + def sample_sheet_headers(self) -> list[str]: + """Headers for sample sheet.""" + return NalloSampleSheetHeaders.list() + + def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]: + """Collect and format information required to build a sample sheet for a single sample.""" + read_file_paths = self.get_bam_read_file_paths(sample=case_sample.sample) + sample_sheet_entries = [] + + for bam_path in read_file_paths: + sample_sheet_entry = NalloSampleSheetEntry( + project=case_sample.case.internal_id, + sample=case_sample.sample.internal_id, + read_file=Path(bam_path), + family_id=case_sample.case.internal_id, + paternal_id=case_sample.get_paternal_sample_id or "0", + maternal_id=case_sample.get_maternal_sample_id or "0", + sex=self.get_sex_code(case_sample.sample.sex), + phenotype=self.get_phenotype_code(case_sample.status), + ) + sample_sheet_entries.extend(sample_sheet_entry.reformat_sample_content) + return sample_sheet_entries + + @staticmethod + def get_phenotype_code(phenotype: str) -> int: + """Return Nallo phenotype code.""" + LOG.debug("Translate phenotype to integer code") + try: + code = PlinkPhenotypeStatus[phenotype.upper()] + except KeyError: + raise ValueError(f"{phenotype} is not a valid phenotype") + return code + + @staticmethod + def get_sex_code(sex: str) -> int: + """Return Nallo sex code.""" + LOG.debug("Translate sex to integer code") + try: + code = PlinkSex[sex.upper()] + except KeyError: + raise ValueError(f"{sex} is not a valid sex") + return code + + def get_built_workflow_parameters(self, case_id: str) -> NalloParameters: + """Return parameters.""" + outdir = self.get_case_path(case_id=case_id) + + return NalloParameters( + input=self.get_sample_sheet_path(case_id=case_id), + outdir=outdir, + ) diff --git a/cg/meta/workflow/nf_analysis.py b/cg/meta/workflow/nf_analysis.py index 0b0d985444..a0acdf0bf6 100644 --- a/cg/meta/workflow/nf_analysis.py +++ b/cg/meta/workflow/nf_analysis.py @@ -18,6 +18,7 @@ WorkflowManager, ) from cg.constants.gene_panel import GenePanelGenomeBuild +from cg.constants.housekeeper_tags import AlignmentFileTag from cg.constants.nextflow import NFX_WORK_DIR from cg.constants.nf_analysis import NfTowerStatus from cg.constants.tb import AnalysisStatus @@ -252,6 +253,15 @@ def get_paired_read_paths(self, sample=Sample) -> tuple[list[str], list[str]]: ) return fastq_forward_read_paths, fastq_reverse_read_paths + def get_bam_read_file_paths(self, sample=Sample) -> list[Path]: + """Gather BAM file path for a sample based on the BAM tag.""" + return [ + Path(hk_file.full_path) + for hk_file in self.housekeeper_api.files( + bundle=sample.internal_id, tags={AlignmentFileTag.BAM} + ) + ] + def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[list[str]]: """Collect and format information required to build a sample sheet for a single sample.""" raise NotImplementedError diff --git a/cg/models/cg_config.py b/cg/models/cg_config.py index bab7e52a26..54cea8deb1 100644 --- a/cg/models/cg_config.py +++ b/cg/models/cg_config.py @@ -211,6 +211,24 @@ class MipConfig(BaseModel): script: str +class NalloConfig(CommonAppConfig): + binary_path: str | None = None + compute_env: str + conda_binary: str | None = None + conda_env: str + platform: str + params: str + config: str + resources: str + launch_directory: str + workflow_bin_path: str + profile: str + revision: str + root: str + slurm: SlurmConfig + tower_workflow: str + + class RarediseaseConfig(CommonAppConfig): binary_path: str | None = None compute_env: str @@ -442,6 +460,7 @@ class CGConfig(BaseModel): mip_rd_dna: MipConfig | None = Field(None, alias="mip-rd-dna") mip_rd_rna: MipConfig | None = Field(None, alias="mip-rd-rna") mutant: MutantConfig | None = None + nallo: NalloConfig | None = None raredisease: RarediseaseConfig | None = None rnafusion: RnafusionConfig | None = None statina: StatinaConfig | None = None diff --git a/cg/models/nallo/__init__.py b/cg/models/nallo/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cg/models/nallo/nallo.py b/cg/models/nallo/nallo.py new file mode 100644 index 0000000000..c5463d5fe1 --- /dev/null +++ b/cg/models/nallo/nallo.py @@ -0,0 +1,63 @@ +from enum import StrEnum +from pathlib import Path + +from pydantic import BaseModel, field_validator + +from cg.exc import NfSampleSheetError +from cg.models.nf_analysis import WorkflowParameters + + +class NalloSampleSheetEntry(BaseModel): + """Nallo sample model is used when building the sample sheet.""" + + project: str + sample: str + read_file: Path + family_id: str + paternal_id: str + maternal_id: str + sex: int + phenotype: int + + @property + def reformat_sample_content(self) -> list[list[str]]: + """Reformat sample sheet content as a list of lists, where each list represents a line in the final file.""" + return [ + [ + self.project, + self.sample, + self.read_file, + self.family_id, + self.paternal_id, + self.maternal_id, + self.sex, + self.phenotype, + ] + ] + + @field_validator("read_file") + @classmethod + def read_file_exists(cls, bam_path: Path) -> Path: + """Verify that bam files exist.""" + if not bam_path.is_file(): + raise NfSampleSheetError(f"Bam file does not exist: {str(bam_path)}") + return bam_path + + +class NalloSampleSheetHeaders(StrEnum): + project: str = "project" + sample: str = "sample" + file: str = "file" + family_id: str = "family_id" + paternal_id: str = "paternal_id" + maternal_id: str = "maternal_id" + sex: str = "sex" + phenotype: str = "phenotype" + + @classmethod + def list(cls) -> list[str]: + return list(map(lambda header: header.value, cls)) + + +class NalloParameters(WorkflowParameters): + """Model for Nallo parameters.""" diff --git a/tests/cli/workflow/nf_analysis/test_cli_config_case.py b/tests/cli/workflow/nf_analysis/test_cli_config_case.py index 87b9143c34..f3c5dcc834 100644 --- a/tests/cli/workflow/nf_analysis/test_cli_config_case.py +++ b/tests/cli/workflow/nf_analysis/test_cli_config_case.py @@ -24,7 +24,7 @@ @pytest.mark.parametrize( "workflow", - NEXTFLOW_WORKFLOWS, + NEXTFLOW_WORKFLOWS + [Workflow.NALLO], ) def test_config_case_without_options( cli_runner: CliRunner, workflow: Workflow, request: FixtureRequest diff --git a/tests/conftest.py b/tests/conftest.py index 8d2b9210fe..1a5ea4507e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,7 +32,7 @@ from cg.constants import FileExtensions, SequencingFileTag, Workflow from cg.constants.constants import CaseActions, CustomerId, FileFormat, GenomeVersion, Strandedness from cg.constants.gene_panel import GenePanelMasterList -from cg.constants.housekeeper_tags import HK_DELIVERY_REPORT_TAG +from cg.constants.housekeeper_tags import HK_DELIVERY_REPORT_TAG, AlignmentFileTag from cg.constants.priority import SlurmQos from cg.constants.scout import ScoutExportFileName from cg.constants.sequencing import SequencingPlatform @@ -52,6 +52,7 @@ from cg.models import CompressionData from cg.models.cg_config import CGConfig, PDCArchivingDirectory from cg.models.downsample.downsample_data import DownsampleData +from cg.models.nallo.nallo import NalloSampleSheetHeaders from cg.models.raredisease.raredisease import RarediseaseParameters, RarediseaseSampleSheetHeaders from cg.models.rnafusion.rnafusion import RnafusionParameters, RnafusionSampleSheetEntry from cg.models.run_devices.illumina_run_directory_data import IlluminaRunDirectoryData @@ -1434,6 +1435,12 @@ def metagenomics_application_tag() -> str: return "METPCFR030" +@pytest.fixture +def wgs_long_read_application_tag() -> str: + """Return the raw data bam application tag.""" + return "LWPBELB070" + + @pytest.fixture def store() -> Generator[Store, None, None]: """Return a CG store.""" @@ -1899,6 +1906,7 @@ def context_config( conda_binary: Path, balsamic_dir: Path, microsalt_dir: Path, + nallo_dir: Path, raredisease_dir: Path, rnafusion_dir: Path, taxprofiler_dir: Path, @@ -2091,6 +2099,27 @@ def context_config( "conda_env": "S_mutant", "root": str(mip_dir), }, + "nallo": { + "binary_path": nextflow_binary.as_posix(), + "compute_env": "nf_tower_compute_env", + "conda_binary": conda_binary.as_posix(), + "conda_env": "S_nallo", + "platform": str(nf_analysis_platform_config_path), + "params": str(nf_analysis_pipeline_params_path), + "config": str(nf_analysis_pipeline_config_path), + "resources": str(nf_analysis_pipeline_resource_optimisation_path), + "launch_directory": Path("path", "to", "launchdir").as_posix(), + "workflow_bin_path": Path("workflow", "path").as_posix(), + "profile": "myprofile", + "references": Path("path", "to", "references").as_posix(), + "revision": "dev", + "root": str(nallo_dir), + "slurm": { + "account": "development", + "mail_user": email_address, + }, + "tower_workflow": "nallo", + }, "raredisease": { "binary_path": nextflow_binary.as_posix(), "compute_env": "nf_tower_compute_env", @@ -2476,6 +2505,19 @@ def mock_fastq_files(fastq_forward_read_path: Path, fastq_reverse_read_path: Pat return [fastq_forward_read_path, fastq_reverse_read_path] +@pytest.fixture(scope="session") +def bam_unmapped_read_paths(housekeeper_dir: Path) -> Path: + """Path to existing bam read file.""" + bam_unmapped_read_path = Path( + housekeeper_dir, "m00000_000000_000000_s4.hifi_reads.bc2021" + ).with_suffix(f"{AlignmentFileTag.BAM}") + with open(bam_unmapped_read_path, "wb") as wh: + wh.write( + b"1f 8b 08 04 00 00 00 00 00 ff 06 00 42 43 02 00 1b 00 03 00 00 00 00 00 00 00 00 00" + ) + return bam_unmapped_read_path + + @pytest.fixture(scope="session") def sequencing_platform() -> str: """Return a default sequencing platform.""" @@ -2483,15 +2525,131 @@ def sequencing_platform() -> str: # Nallo fixtures +@pytest.fixture(scope="session") +def nallo_case_id() -> str: + """Returns a nallo case id.""" + return "nallo_case_two_samples" + + @pytest.fixture(scope="function") def nallo_context( cg_context: CGConfig, + helpers: StoreHelpers, + nf_analysis_housekeeper: HousekeeperAPI, + trailblazer_api: MockTB, + hermes_api: HermesApi, + cg_dir: Path, + nallo_case_id: str, + sample_id: str, + sample_name: str, + another_sample_name: str, + father_sample_id: str, + no_sample_case_id: str, + wgs_long_read_application_tag: str, ) -> CGConfig: - """Context to use in cli.""" + """Context to use in CLI.""" + cg_context.housekeeper_api_ = nf_analysis_housekeeper + cg_context.trailblazer_api_ = trailblazer_api cg_context.meta_apis["analysis_api"] = NalloAnalysisAPI(config=cg_context) + status_db: Store = cg_context.status_db + + # Create ERROR case with NO SAMPLES + helpers.add_case(status_db, internal_id=no_sample_case_id, name=no_sample_case_id) + + # Create textbook case with two samples + nallo_case_two_samples: Case = helpers.add_case( + store=status_db, + internal_id=nallo_case_id, + name=nallo_case_id, + data_analysis=Workflow.NALLO, + ) + + nallo_sample_one: Sample = helpers.add_sample( + status_db, + internal_id=sample_id, + name=sample_name, + last_sequenced_at=datetime.now(), + application_tag=wgs_long_read_application_tag, + reference_genome=GenomeVersion.HG38, + ) + + another_nallo_sample: Sample = helpers.add_sample( + status_db, + internal_id=father_sample_id, + name=another_sample_name, + last_sequenced_at=datetime.now(), + application_tag=wgs_long_read_application_tag, + reference_genome=GenomeVersion.HG38, + ) + + helpers.add_relationship( + status_db, + case=nallo_case_two_samples, + sample=nallo_sample_one, + ) + + helpers.add_relationship( + status_db, + case=nallo_case_two_samples, + sample=another_nallo_sample, + ) return cg_context +@pytest.fixture(scope="function") +def nallo_dir(tmpdir_factory, apps_dir: Path) -> str: + """Return the path to the nallo apps dir.""" + nallo_dir = tmpdir_factory.mktemp("nallo") + return Path(nallo_dir).absolute().as_posix() + + +@pytest.fixture(scope="function") +def nallo_nexflow_config_file_path(nallo_dir, nallo_case_id) -> Path: + """Path to config file.""" + return Path(nallo_dir, nallo_case_id, f"{nallo_case_id}_nextflow_config").with_suffix( + FileExtensions.JSON + ) + + +@pytest.fixture(scope="function") +def nallo_params_file_path(nallo_dir, nallo_case_id) -> Path: + """Path to parameters file.""" + return Path(nallo_dir, nallo_case_id, f"{nallo_case_id}_params_file").with_suffix( + FileExtensions.YAML + ) + + +@pytest.fixture(scope="function") +def nallo_sample_sheet_content( + sample_id: str, + nallo_case_id: str, + bam_unmapped_read_paths: Path, +) -> str: + """Return the expected sample sheet content for Nallo.""" + headers: str = ",".join(NalloSampleSheetHeaders.list()) + row: str = ",".join( + [ + nallo_case_id, + sample_id, + bam_unmapped_read_paths.as_posix(), + nallo_case_id, + "0", + "0", + "2", + "2", + ] + ) + return "\n".join([headers, row]) + + +@pytest.fixture(scope="function") +def nallo_sample_sheet_path(nallo_dir, nallo_case_id) -> Path: + """Path to sample sheet.""" + return Path(nallo_dir, nallo_case_id, f"{nallo_case_id}_samplesheet").with_suffix( + FileExtensions.CSV + ) + + # Raredisease fixtures @pytest.fixture(scope="function") def raredisease_dir(tmpdir_factory, apps_dir: Path) -> str: diff --git a/tests/meta/workflow/test_nallo.py b/tests/meta/workflow/test_nallo.py new file mode 100644 index 0000000000..498dab090f --- /dev/null +++ b/tests/meta/workflow/test_nallo.py @@ -0,0 +1,28 @@ +from cg.meta.workflow.nallo import NalloAnalysisAPI +from cg.models.cg_config import CGConfig + + +def test_get_sample_sheet_content( + nallo_context: CGConfig, + nallo_case_id: str, +): + """Test Nallo nextflow sample sheet creation.""" + + # GIVEN Nallo analysis API + analysis_api: NalloAnalysisAPI = nallo_context.meta_apis["analysis_api"] + + # WHEN getting the sample sheet content + result = analysis_api.get_sample_sheet_content(case_id=nallo_case_id) + + # THEN return should contain patterns + patterns = [ + "ADM1", + "m00000_000000_000000_s4.hifi_reads.bc2021.bam", + "nallo_case_two_samples", + ] + + contains_pattern = any( + any(any(pattern in sub_element for pattern in patterns) for sub_element in element) + for element in result + ) + assert contains_pattern