From a0e627ddc29719b03ceb648c16c07fd3b1804350 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Fri, 15 Dec 2023 16:05:23 +0100 Subject: [PATCH] Change logic, first adapt all indexes, then all barcode mismatches --- .../demultiplex/sample_sheet/sample_models.py | 36 +++++++++++-------- .../sample_sheet/sample_sheet_creator.py | 12 +++---- tests/apps/demultiplex/conftest.py | 12 ------- tests/apps/demultiplex/test_sample_models.py | 6 ++-- .../demultiplex/test_create_sample_sheet.py | 2 +- tests/conftest.py | 13 +++++++ 6 files changed, 43 insertions(+), 38 deletions(-) diff --git a/cg/apps/demultiplex/sample_sheet/sample_models.py b/cg/apps/demultiplex/sample_sheet/sample_models.py index 9177c212e7..af6433b42d 100644 --- a/cg/apps/demultiplex/sample_sheet/sample_models.py +++ b/cg/apps/demultiplex/sample_sheet/sample_models.py @@ -38,12 +38,15 @@ def separate_indexes(self) -> None: self.index2 = index2.strip() @abstractmethod - def process_sample_for_sample_sheet( - self, run_parameters: RunParameters, samples_to_compare: list | None = None - ): + def process_indexes(self, run_parameters: RunParameters): """Update the required attributes to be exported to a sample sheet.""" pass + @abstractmethod + def update_barcode_mismatches(self, samples_to_compare: list) -> None: + """Update the barcode_mismatches_1 and barcode_mismatches_2 attributes.""" + pass + class FlowCellSampleBcl2Fastq(FlowCellSample): """Base class for NovaSeq6000 flow cell samples.""" @@ -81,7 +84,7 @@ def _pad_indexes_if_necessary(self, run_parameters: RunParameters) -> None: return LOG.debug(f"Padding not necessary for sample {self.sample_id}") - def process_sample_for_sample_sheet(self, run_parameters: RunParameters, **kwargs): + def process_indexes(self, run_parameters: RunParameters): """Update the required attributes to be exported to a sample sheet.""" reverse_index2: bool = run_parameters.index_settings.should_i5_be_reverse_complimented self.separate_indexes() @@ -89,6 +92,10 @@ def process_sample_for_sample_sheet(self, run_parameters: RunParameters, **kwarg if reverse_index2: self.index2 = get_reverse_complement_dna_seq(self.index2) + def update_barcode_mismatches(self, samples_to_compare: list) -> None: + """No updating of barcode mismatch values for Bcl2Fastq sample.""" + LOG.debug(f"No updating of barcode mismatch values for Bcl2Fastq sample {self.sample_id}") + class FlowCellSampleBCLConvert(FlowCellSample): """Class that represents a NovaSeqX flow cell sample.""" @@ -145,7 +152,7 @@ def update_override_cycles(self, run_parameters: RunParameters) -> None: ) self.override_cycles = read1_cycles + index1_cycles + index2_cycles + read2_cycles - def update_barcode_mismatches_1(self, samples_to_compare: list) -> None: + def _update_barcode_mismatches_1(self, samples_to_compare: list) -> None: """Assign zero to barcode_mismatches_1 if the hamming distance between self.index and the index1 of any sample in the lane is below the minimum threshold.""" for sample in samples_to_compare: @@ -159,7 +166,7 @@ def update_barcode_mismatches_1(self, samples_to_compare: list) -> None: self.barcode_mismatches_1 = 0 break - def update_barcode_mismatches_2(self, samples_to_compare: list) -> None: + def _update_barcode_mismatches_2(self, samples_to_compare: list) -> None: """Assign zero to barcode_mismatches_2 if the hamming distance between self.index2 and the index2 of any sample in the lane is below the minimum threshold. If the sample is single-indexed, assign 'na'.""" @@ -178,15 +185,16 @@ def update_barcode_mismatches_2(self, samples_to_compare: list) -> None: self.barcode_mismatches_2 = 0 break - def process_sample_for_sample_sheet( - self, run_parameters: RunParameters, samples_to_compare: list | None = None - ): + def process_indexes(self, run_parameters: RunParameters): """Update the required attributes to be exported to a sample sheet.""" - if not samples_to_compare: - raise SampleSheetError("No samples to compare with to update barcode mismatch values") self.separate_indexes() if run_parameters.index_settings.should_i5_be_reverse_complimented: self.index2 = get_reverse_complement_dna_seq(self.index2) - self.update_override_cycles(run_parameters) - self.update_barcode_mismatches_1(samples_to_compare=samples_to_compare) - self.update_barcode_mismatches_2(samples_to_compare=samples_to_compare) + self.update_override_cycles(run_parameters=run_parameters) + + def update_barcode_mismatches(self, samples_to_compare: list) -> None: + """Update the barcode_mismatches_1 and barcode_mismatches_2 attributes.""" + if not samples_to_compare: + raise SampleSheetError("No samples to compare with to update barcode mismatch values") + self._update_barcode_mismatches_1(samples_to_compare=samples_to_compare) + self._update_barcode_mismatches_2(samples_to_compare=samples_to_compare) diff --git a/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py b/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py index b5d1180301..ad3a6a41a2 100644 --- a/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py +++ b/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py @@ -138,16 +138,12 @@ def create_sample_sheet_content(self) -> list[list[str]]: def process_samples_for_sample_sheet(self) -> None: """Remove unwanted samples and adapt remaining samples.""" self.remove_unwanted_samples() - samples_in_lane: list[FlowCellSampleBCLConvert | FlowCellSampleBcl2Fastq] + for sample in self.lims_samples: + sample.process_indexes(run_parameters=self.run_parameters) for lane, samples_in_lane in get_samples_by_lane(self.lims_samples).items(): - LOG.info( - "Adapting index, override cycles and barcode mismatch values " - f"for samples in lane {lane}" - ) + LOG.info(f"Updating barcode mismatch values for samples in lane {lane}") for sample in samples_in_lane: - sample.process_sample_for_sample_sheet( - run_parameters=self.run_parameters, samples_to_compare=samples_in_lane - ) + sample.update_barcode_mismatches(samples_to_compare=samples_in_lane) def construct_sample_sheet(self) -> list[list[str]]: """Construct and validate the sample sheet.""" diff --git a/tests/apps/demultiplex/conftest.py b/tests/apps/demultiplex/conftest.py index 689e090794..d0467cd5ee 100644 --- a/tests/apps/demultiplex/conftest.py +++ b/tests/apps/demultiplex/conftest.py @@ -56,18 +56,6 @@ def bcl2fastq_sample_sheet_creator( ) -@pytest.fixture -def bcl_convert_sample_sheet_creator( - bcl_convert_flow_cell: FlowCellDirectoryData, - lims_novaseq_bcl_convert_samples: list[FlowCellSampleBCLConvert], -) -> SampleSheetCreatorBCLConvert: - """Returns a sample sheet creator for version 2 sample sheets with dragen format.""" - return SampleSheetCreatorBCLConvert( - flow_cell=bcl_convert_flow_cell, - lims_samples=lims_novaseq_bcl_convert_samples, - ) - - # Sample sheet validation diff --git a/tests/apps/demultiplex/test_sample_models.py b/tests/apps/demultiplex/test_sample_models.py index 734bd28b9f..5934fedd6b 100644 --- a/tests/apps/demultiplex/test_sample_models.py +++ b/tests/apps/demultiplex/test_sample_models.py @@ -216,7 +216,7 @@ def test_update_barcode_mismatches_1( sample_to_update: FlowCellSampleBCLConvert = sample_list[0] # WHEN updating the barcode mismatches 1 - sample_to_update.update_barcode_mismatches_1(samples_to_compare=sample_list) + sample_to_update._update_barcode_mismatches_1(samples_to_compare=sample_list) # THEN the barcode mismatches 1 are updated with the expected value assert sample_to_update.barcode_mismatches_1 == expected_barcode_mismatch @@ -240,7 +240,7 @@ def test_update_barcode_mismatches_2( sample_to_update: FlowCellSampleBCLConvert = sample_list[0] # WHEN updating the barcode mismatches 2 - sample_to_update.update_barcode_mismatches_2(samples_to_compare=sample_list) + sample_to_update._update_barcode_mismatches_2(samples_to_compare=sample_list) # THEN the barcode mismatches 1 are updated with the expected value assert sample_to_update.barcode_mismatches_2 == expected_barcode_mismatch @@ -278,7 +278,7 @@ def test_process_sample_for_sample_sheet_bcl_convert( sample: FlowCellSampleBCLConvert = raw_lims_samples[0] # WHEN processing the sample for a sample sheet - sample.process_sample_for_sample_sheet(run_parameters=run_parameters) + sample.process_indexes(run_parameters=run_parameters) # THEN the sample is processed assert sample.barcode_mismatches_1 diff --git a/tests/cli/demultiplex/test_create_sample_sheet.py b/tests/cli/demultiplex/test_create_sample_sheet.py index aa0133cc49..72c1279da1 100644 --- a/tests/cli/demultiplex/test_create_sample_sheet.py +++ b/tests/cli/demultiplex/test_create_sample_sheet.py @@ -122,7 +122,7 @@ class SampleSheetScenario(BaseModel): ), SampleSheetScenario( flow_cell_directory="novaseq_6000_post_1_5_kits_flow_cell", - lims_samples="novaseq_6000_post_1_5_kits_samples", + lims_samples="novaseq_6000_post_1_5_kits_lims_samples", correct_sample_sheet="novaseq_6000_post_1_5_kits_correct_sample_sheet", ), SampleSheetScenario( diff --git a/tests/conftest.py b/tests/conftest.py index 87f4ff6879..a6c7f500c0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,6 +19,7 @@ FlowCellSampleBcl2Fastq, FlowCellSampleBCLConvert, ) +from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import SampleSheetCreatorBCLConvert from cg.apps.downsample.downsample import DownsampleAPI from cg.apps.gens import GensAPI from cg.apps.gt import GenotypeAPI @@ -1006,6 +1007,18 @@ def sample_sheet_context( return cg_context +@pytest.fixture +def bcl_convert_sample_sheet_creator( + bcl_convert_flow_cell: FlowCellDirectoryData, + lims_novaseq_bcl_convert_samples: list[FlowCellSampleBCLConvert], +) -> SampleSheetCreatorBCLConvert: + """Returns a sample sheet creator for version 2 sample sheets with dragen format.""" + return SampleSheetCreatorBCLConvert( + flow_cell=bcl_convert_flow_cell, + lims_samples=lims_novaseq_bcl_convert_samples, + ) + + @pytest.fixture(scope="session") def bcl_convert_demultiplexed_flow_cell_sample_internal_ids() -> list[str]: """