Skip to content

Commit

Permalink
Change logic, first adapt all indexes, then all barcode mismatches
Browse files Browse the repository at this point in the history
  • Loading branch information
diitaz93 committed Dec 15, 2023
1 parent acfd228 commit a0e627d
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 38 deletions.
36 changes: 22 additions & 14 deletions cg/apps/demultiplex/sample_sheet/sample_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@ def separate_indexes(self) -> None:
self.index2 = index2.strip()

@abstractmethod
def process_sample_for_sample_sheet(
self, run_parameters: RunParameters, samples_to_compare: list | None = None
):
def process_indexes(self, run_parameters: RunParameters):
"""Update the required attributes to be exported to a sample sheet."""
pass

@abstractmethod
def update_barcode_mismatches(self, samples_to_compare: list) -> None:
"""Update the barcode_mismatches_1 and barcode_mismatches_2 attributes."""
pass


class FlowCellSampleBcl2Fastq(FlowCellSample):
"""Base class for NovaSeq6000 flow cell samples."""
Expand Down Expand Up @@ -81,14 +84,18 @@ def _pad_indexes_if_necessary(self, run_parameters: RunParameters) -> None:
return
LOG.debug(f"Padding not necessary for sample {self.sample_id}")

def process_sample_for_sample_sheet(self, run_parameters: RunParameters, **kwargs):
def process_indexes(self, run_parameters: RunParameters):
"""Update the required attributes to be exported to a sample sheet."""
reverse_index2: bool = run_parameters.index_settings.should_i5_be_reverse_complimented
self.separate_indexes()
self._pad_indexes_if_necessary(run_parameters=run_parameters)
if reverse_index2:
self.index2 = get_reverse_complement_dna_seq(self.index2)

def update_barcode_mismatches(self, samples_to_compare: list) -> None:
"""No updating of barcode mismatch values for Bcl2Fastq sample."""
LOG.debug(f"No updating of barcode mismatch values for Bcl2Fastq sample {self.sample_id}")


class FlowCellSampleBCLConvert(FlowCellSample):
"""Class that represents a NovaSeqX flow cell sample."""
Expand Down Expand Up @@ -145,7 +152,7 @@ def update_override_cycles(self, run_parameters: RunParameters) -> None:
)
self.override_cycles = read1_cycles + index1_cycles + index2_cycles + read2_cycles

def update_barcode_mismatches_1(self, samples_to_compare: list) -> None:
def _update_barcode_mismatches_1(self, samples_to_compare: list) -> None:
"""Assign zero to barcode_mismatches_1 if the hamming distance between self.index
and the index1 of any sample in the lane is below the minimum threshold."""
for sample in samples_to_compare:
Expand All @@ -159,7 +166,7 @@ def update_barcode_mismatches_1(self, samples_to_compare: list) -> None:
self.barcode_mismatches_1 = 0
break

def update_barcode_mismatches_2(self, samples_to_compare: list) -> None:
def _update_barcode_mismatches_2(self, samples_to_compare: list) -> None:
"""Assign zero to barcode_mismatches_2 if the hamming distance between self.index2
and the index2 of any sample in the lane is below the minimum threshold.
If the sample is single-indexed, assign 'na'."""
Expand All @@ -178,15 +185,16 @@ def update_barcode_mismatches_2(self, samples_to_compare: list) -> None:
self.barcode_mismatches_2 = 0
break

def process_sample_for_sample_sheet(
self, run_parameters: RunParameters, samples_to_compare: list | None = None
):
def process_indexes(self, run_parameters: RunParameters):
"""Update the required attributes to be exported to a sample sheet."""
if not samples_to_compare:
raise SampleSheetError("No samples to compare with to update barcode mismatch values")
self.separate_indexes()
if run_parameters.index_settings.should_i5_be_reverse_complimented:
self.index2 = get_reverse_complement_dna_seq(self.index2)
self.update_override_cycles(run_parameters)
self.update_barcode_mismatches_1(samples_to_compare=samples_to_compare)
self.update_barcode_mismatches_2(samples_to_compare=samples_to_compare)
self.update_override_cycles(run_parameters=run_parameters)

def update_barcode_mismatches(self, samples_to_compare: list) -> None:
"""Update the barcode_mismatches_1 and barcode_mismatches_2 attributes."""
if not samples_to_compare:
raise SampleSheetError("No samples to compare with to update barcode mismatch values")
self._update_barcode_mismatches_1(samples_to_compare=samples_to_compare)
self._update_barcode_mismatches_2(samples_to_compare=samples_to_compare)
12 changes: 4 additions & 8 deletions cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,12 @@ def create_sample_sheet_content(self) -> list[list[str]]:
def process_samples_for_sample_sheet(self) -> None:
"""Remove unwanted samples and adapt remaining samples."""
self.remove_unwanted_samples()
samples_in_lane: list[FlowCellSampleBCLConvert | FlowCellSampleBcl2Fastq]
for sample in self.lims_samples:
sample.process_indexes(run_parameters=self.run_parameters)
for lane, samples_in_lane in get_samples_by_lane(self.lims_samples).items():
LOG.info(
"Adapting index, override cycles and barcode mismatch values "
f"for samples in lane {lane}"
)
LOG.info(f"Updating barcode mismatch values for samples in lane {lane}")
for sample in samples_in_lane:
sample.process_sample_for_sample_sheet(
run_parameters=self.run_parameters, samples_to_compare=samples_in_lane
)
sample.update_barcode_mismatches(samples_to_compare=samples_in_lane)

def construct_sample_sheet(self) -> list[list[str]]:
"""Construct and validate the sample sheet."""
Expand Down
12 changes: 0 additions & 12 deletions tests/apps/demultiplex/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,6 @@ def bcl2fastq_sample_sheet_creator(
)


@pytest.fixture
def bcl_convert_sample_sheet_creator(
bcl_convert_flow_cell: FlowCellDirectoryData,
lims_novaseq_bcl_convert_samples: list[FlowCellSampleBCLConvert],
) -> SampleSheetCreatorBCLConvert:
"""Returns a sample sheet creator for version 2 sample sheets with dragen format."""
return SampleSheetCreatorBCLConvert(
flow_cell=bcl_convert_flow_cell,
lims_samples=lims_novaseq_bcl_convert_samples,
)


# Sample sheet validation


Expand Down
6 changes: 3 additions & 3 deletions tests/apps/demultiplex/test_sample_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_update_barcode_mismatches_1(
sample_to_update: FlowCellSampleBCLConvert = sample_list[0]

# WHEN updating the barcode mismatches 1
sample_to_update.update_barcode_mismatches_1(samples_to_compare=sample_list)
sample_to_update._update_barcode_mismatches_1(samples_to_compare=sample_list)

# THEN the barcode mismatches 1 are updated with the expected value
assert sample_to_update.barcode_mismatches_1 == expected_barcode_mismatch
Expand All @@ -240,7 +240,7 @@ def test_update_barcode_mismatches_2(
sample_to_update: FlowCellSampleBCLConvert = sample_list[0]

# WHEN updating the barcode mismatches 2
sample_to_update.update_barcode_mismatches_2(samples_to_compare=sample_list)
sample_to_update._update_barcode_mismatches_2(samples_to_compare=sample_list)

# THEN the barcode mismatches 1 are updated with the expected value
assert sample_to_update.barcode_mismatches_2 == expected_barcode_mismatch
Expand Down Expand Up @@ -278,7 +278,7 @@ def test_process_sample_for_sample_sheet_bcl_convert(
sample: FlowCellSampleBCLConvert = raw_lims_samples[0]

# WHEN processing the sample for a sample sheet
sample.process_sample_for_sample_sheet(run_parameters=run_parameters)
sample.process_indexes(run_parameters=run_parameters)

# THEN the sample is processed
assert sample.barcode_mismatches_1
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/demultiplex/test_create_sample_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class SampleSheetScenario(BaseModel):
),
SampleSheetScenario(
flow_cell_directory="novaseq_6000_post_1_5_kits_flow_cell",
lims_samples="novaseq_6000_post_1_5_kits_samples",
lims_samples="novaseq_6000_post_1_5_kits_lims_samples",
correct_sample_sheet="novaseq_6000_post_1_5_kits_correct_sample_sheet",
),
SampleSheetScenario(
Expand Down
13 changes: 13 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
FlowCellSampleBcl2Fastq,
FlowCellSampleBCLConvert,
)
from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import SampleSheetCreatorBCLConvert
from cg.apps.downsample.downsample import DownsampleAPI
from cg.apps.gens import GensAPI
from cg.apps.gt import GenotypeAPI
Expand Down Expand Up @@ -1006,6 +1007,18 @@ def sample_sheet_context(
return cg_context


@pytest.fixture
def bcl_convert_sample_sheet_creator(
bcl_convert_flow_cell: FlowCellDirectoryData,
lims_novaseq_bcl_convert_samples: list[FlowCellSampleBCLConvert],
) -> SampleSheetCreatorBCLConvert:
"""Returns a sample sheet creator for version 2 sample sheets with dragen format."""
return SampleSheetCreatorBCLConvert(
flow_cell=bcl_convert_flow_cell,
lims_samples=lims_novaseq_bcl_convert_samples,
)


@pytest.fixture(scope="session")
def bcl_convert_demultiplexed_flow_cell_sample_internal_ids() -> list[str]:
"""
Expand Down

0 comments on commit a0e627d

Please sign in to comment.