Skip to content

Commit e65ae73

Browse files
author
ChristianOertlin
authored
Re-route flow to illumina device tables. (#3349)(major)
# Description Feature branch to re-route the post-processing database flow from the `Flowcell` -> `SampleLaneSequencingMetrics` model paradigm to `IlluminaFlowCell` -> `IlluminaSequencingRun` -> `IlluminaSampleSequencingMetrics`. The choice to use a feature branch is to collect all changes and make sure they are tested. Since we need to alter core logic that requires a database migration we cannot do these step by step. PR in servers: https://github.com/Clinical-Genomics/servers/pull/1394 Data migration script: https://github.com/Clinical-Genomics/add-new-tech/issues/7 ## Deploy ```shell bash /home/proj/production/servers/resources/hasta.scilifelab.se/update-tool-stage.sh -e S_cg -t cg -b develop-illumina-devices -a ``` ## CLI command changes Commands that need to be updated in servers - [x] `cg clean flow-cells` -> `cg clean illumina-runs` (requires servers update, PR: <placeholder>) - [x] `cg backup encrypt-flow-cells` -> `cg backup encrypt-illumina-runs` (requires servers update, PR: <placeholder>) - [x] `cg backup flow-cells` -> `cg backup illumina-runs` (requires servers update, PR: <placeholder>) - [x] `cg backup fetch-flow-cell`-> `cg backup fetch-illumina-run` (no update needed) - [x] `cg demultiplex finish flow-cell` -> `cg demultiplex finish illumina-run` (no update needed in servers - [x] `cg get flow-cell` -> `cg get sequencing-run` (Does not require a PR in servers) - [x] `cg set flowcell` -> `cg set sequencing-run` (Does not require a PR in servers) - [x] `cg store flow-cell`-> `cg store illumina-run` (Does not require a PR in servers) - [x] `cg store demultiplexed-flow-cell` -> `cg store demultiplexed-run` (Does not require a PR in servers) - [x] `cg decompress flow-cell` -> `cg decompress illumina-run` (Does not require a PR in servers)
1 parent e29050b commit e65ae73

File tree

166 files changed

+5676
-8939
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

166 files changed

+5676
-8939
lines changed

cg/apps/demultiplex/sample_sheet/api.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@
33

44
import click
55

6-
from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_flow_cell_samples_from_content
7-
from cg.apps.demultiplex.sample_sheet.sample_models import FlowCellSample
6+
from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_samples_from_content
7+
from cg.apps.demultiplex.sample_sheet.sample_models import IlluminaSampleIndexSetting
88
from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import SampleSheetCreator
99
from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import SampleSheetValidator
10+
from cg.apps.demultiplex.sample_sheet.utils import (
11+
delete_sample_sheet_from_housekeeper,
12+
add_and_include_sample_sheet_path_to_housekeeper,
13+
)
1014
from cg.apps.housekeeper.hk import HousekeeperAPI
1115
from cg.apps.lims import LimsAPI
1216
from cg.apps.lims.sample_sheet import get_flow_cell_samples
@@ -22,17 +26,13 @@
2226
SampleSheetFormatError,
2327
)
2428
from cg.io.controller import ReadFile, WriteFile, WriteStream
25-
from cg.meta.demultiplex.housekeeper_storage_functions import (
26-
add_and_include_sample_sheet_path_to_housekeeper,
27-
delete_sample_sheet_from_housekeeper,
28-
)
2929
from cg.models.run_devices.illumina_run_directory_data import IlluminaRunDirectoryData
3030
from cg.utils.files import get_directories_in_path, link_or_overwrite_file
3131

3232
LOG = logging.getLogger(__name__)
3333

3434

35-
class SampleSheetAPI:
35+
class IlluminaSampleSheetService:
3636
"""Sample Sheet API class."""
3737

3838
def __init__(self, flow_cell_dir: str, hk_api: HousekeeperAPI, lims_api: LimsAPI) -> None:
@@ -115,19 +115,19 @@ def _replace_sample_header(sample_sheet_content: list[list[str]]) -> list[list[s
115115

116116
def translate_sample_sheet(self, flow_cell_name: str) -> None:
117117
"""Translate a Bcl2Fastq sample sheet to a BCLConvert sample sheet."""
118-
flow_cell: IlluminaRunDirectoryData = self._get_flow_cell(flow_cell_name)
119-
if not self._are_necessary_files_in_flow_cell(flow_cell):
118+
run_directory_data: IlluminaRunDirectoryData = self._get_flow_cell(flow_cell_name)
119+
if not self._are_necessary_files_in_flow_cell(run_directory_data):
120120
raise MissingFilesError("Missing necessary files in run directory for translation")
121121
original_content: list[list[str]] = ReadFile.get_content_from_file(
122-
file_format=FileFormat.CSV, file_path=flow_cell.sample_sheet_path
122+
file_format=FileFormat.CSV, file_path=run_directory_data.sample_sheet_path
123123
)
124124
content_with_fixed_header: list[list[str]] = self._replace_sample_header(original_content)
125125

126-
flow_cell_samples: list[FlowCellSample] = get_flow_cell_samples_from_content(
126+
samples: list[IlluminaSampleIndexSetting] = get_samples_from_content(
127127
sample_sheet_content=content_with_fixed_header
128128
)
129129
bcl_convert_creator = SampleSheetCreator(
130-
flow_cell=flow_cell, lims_samples=flow_cell_samples
130+
run_directory_data=run_directory_data, samples=samples
131131
)
132132
new_content = bcl_convert_creator.construct_sample_sheet()
133133
self.validator.validate_sample_sheet_from_content(new_content)
@@ -141,15 +141,17 @@ def translate_sample_sheet(self, flow_cell_name: str) -> None:
141141
WriteFile.write_file_from_content(
142142
content=new_content,
143143
file_format=FileFormat.CSV,
144-
file_path=flow_cell.sample_sheet_path,
144+
file_path=run_directory_data.sample_sheet_path,
145145
)
146146

147-
def _use_sample_sheet_from_housekeeper(self, flow_cell: IlluminaRunDirectoryData) -> None:
147+
def _use_sample_sheet_from_housekeeper(
148+
self, run_directory_data: IlluminaRunDirectoryData
149+
) -> None:
148150
"""
149151
Copy the sample sheet from Housekeeper to the flow cell directory if it exists and is valid.
150152
"""
151-
sample_sheet_path: Path = self.hk_api.get_sample_sheet_path(flow_cell.id)
152-
flow_cell.set_sample_sheet_path_hk(sample_sheet_path)
153+
sample_sheet_path: Path = self.hk_api.get_sample_sheet_path(run_directory_data.id)
154+
run_directory_data.set_sample_sheet_path_hk(sample_sheet_path)
153155
self.validate_sample_sheet(sample_sheet_path)
154156

155157
if self.dry_run:
@@ -159,7 +161,7 @@ def _use_sample_sheet_from_housekeeper(self, flow_cell: IlluminaRunDirectoryData
159161
)
160162
return
161163
LOG.info("Sample sheet from Housekeeper is valid. Copying it to sequencing run directory")
162-
link_or_overwrite_file(src=sample_sheet_path, dst=flow_cell.sample_sheet_path)
164+
link_or_overwrite_file(src=sample_sheet_path, dst=run_directory_data.sample_sheet_path)
163165

164166
def _use_flow_cell_sample_sheet(self, flow_cell: IlluminaRunDirectoryData) -> None:
165167
"""Use the sample sheet from the flow cell directory if it is valid."""
@@ -186,7 +188,7 @@ def _get_sample_sheet_content(self, flow_cell: IlluminaRunDirectoryData) -> list
186188
Raises:
187189
LimsDataError: If no samples are found in LIMS for the flow cell.
188190
"""
189-
lims_samples: list[FlowCellSample] = list(
191+
lims_samples: list[IlluminaSampleIndexSetting] = list(
190192
get_flow_cell_samples(
191193
lims=self.lims_api,
192194
flow_cell_id=flow_cell.id,
@@ -196,7 +198,7 @@ def _get_sample_sheet_content(self, flow_cell: IlluminaRunDirectoryData) -> list
196198
message: str = f"Could not find any samples in LIMS for {flow_cell.id}"
197199
LOG.warning(message)
198200
raise LimsDataError(message)
199-
creator = SampleSheetCreator(flow_cell=flow_cell, lims_samples=lims_samples)
201+
creator = SampleSheetCreator(run_directory_data=flow_cell, samples=lims_samples)
200202
LOG.info(
201203
f"Constructing sample sheet for the {flow_cell.sequencer_type} flow cell {flow_cell.id}"
202204
)

cg/apps/demultiplex/sample_sheet/read_sample_sheet.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
from pydantic import TypeAdapter
44

5-
from cg.apps.demultiplex.sample_sheet.sample_models import FlowCellSample
5+
from cg.apps.demultiplex.sample_sheet.sample_models import IlluminaSampleIndexSetting
66
from cg.constants.demultiplexing import SampleSheetBcl2FastqSections, SampleSheetBCLConvertSections
77
from cg.exc import SampleSheetContentError, SampleSheetFormatError
88

99
LOG = logging.getLogger(__name__)
1010

1111

12-
def validate_samples_are_unique(samples: list[FlowCellSample]) -> None:
12+
def validate_samples_are_unique(samples: list[IlluminaSampleIndexSetting]) -> None:
1313
"""Validate that each sample only exists once."""
1414
sample_ids: set = set()
1515
for sample in samples:
@@ -21,9 +21,9 @@ def validate_samples_are_unique(samples: list[FlowCellSample]) -> None:
2121
sample_ids.add(sample_id)
2222

2323

24-
def validate_samples_unique_per_lane(samples: list[FlowCellSample]) -> None:
24+
def validate_samples_unique_per_lane(samples: list[IlluminaSampleIndexSetting]) -> None:
2525
"""Validate that each sample only exists once per lane in a sample sheet."""
26-
sample_by_lane: dict[int, list[FlowCellSample]] = get_samples_by_lane(samples)
26+
sample_by_lane: dict[int, list[IlluminaSampleIndexSetting]] = get_samples_by_lane(samples)
2727
for lane, lane_samples in sample_by_lane.items():
2828
LOG.debug(f"Validate that samples are unique in lane: {lane}")
2929
validate_samples_are_unique(samples=lane_samples)
@@ -59,28 +59,28 @@ def get_raw_samples_from_content(sample_sheet_content: list[list[str]]) -> list[
5959

6060

6161
def get_samples_by_lane(
62-
samples: list[FlowCellSample],
63-
) -> dict[int, list[FlowCellSample]]:
62+
samples: list[IlluminaSampleIndexSetting],
63+
) -> dict[int, list[IlluminaSampleIndexSetting]]:
6464
"""Group and return samples by lane."""
6565
LOG.debug("Order samples by lane")
66-
sample_by_lane: dict[int, list[FlowCellSample]] = {}
66+
sample_by_lane: dict[int, list[IlluminaSampleIndexSetting]] = {}
6767
for sample in samples:
6868
if sample.lane not in sample_by_lane:
6969
sample_by_lane[sample.lane] = []
7070
sample_by_lane[sample.lane].append(sample)
7171
return sample_by_lane
7272

7373

74-
def get_flow_cell_samples_from_content(
74+
def get_samples_from_content(
7575
sample_sheet_content: list[list[str]],
76-
) -> list[FlowCellSample]:
76+
) -> list[IlluminaSampleIndexSetting]:
7777
"""
78-
Return the samples in a sample sheet as a list of FlowCellSample objects.
78+
Return the samples in a sample sheet as a list of IlluminaIndexSettings objects.
7979
Raises:
8080
ValidationError: if the samples do not have the correct attributes based on their model.
8181
"""
8282
raw_samples: list[dict[str, str]] = get_raw_samples_from_content(
8383
sample_sheet_content=sample_sheet_content
8484
)
85-
adapter = TypeAdapter(list[FlowCellSample])
85+
adapter = TypeAdapter(list[IlluminaSampleIndexSetting])
8686
return adapter.validate_python(raw_samples)

cg/apps/demultiplex/sample_sheet/sample_models.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
LOG = logging.getLogger(__name__)
1818

1919

20-
class FlowCellSample(BaseModel):
21-
"""Class that represents a flow cell sample."""
20+
class IlluminaSampleIndexSetting(BaseModel):
21+
"""Class that represents index settings for a sample on an Illumina run."""
2222

2323
lane: int = Field(..., alias=SampleSheetBCLConvertSections.Data.LANE)
2424
sample_id: SampleId = Field(..., alias=SampleSheetBCLConvertSections.Data.SAMPLE_INTERNAL_ID)
@@ -80,7 +80,9 @@ def update_override_cycles(self, run_parameters: RunParameters) -> None:
8080
)
8181
self.override_cycles = read1_cycles + index1_cycles + index2_cycles + read2_cycles
8282

83-
def _update_barcode_mismatches_1(self, samples_to_compare: list["FlowCellSample"]) -> None:
83+
def _update_barcode_mismatches_1(
84+
self, samples_to_compare: list["IlluminaSampleIndexSetting"]
85+
) -> None:
8486
"""Assign zero to barcode_mismatches_1 if the hamming distance between self.index
8587
and the index1 of any sample in the lane is below the minimum threshold."""
8688
for sample in samples_to_compare:
@@ -96,7 +98,7 @@ def _update_barcode_mismatches_1(self, samples_to_compare: list["FlowCellSample"
9698

9799
def _update_barcode_mismatches_2(
98100
self,
99-
samples_to_compare: list["FlowCellSample"],
101+
samples_to_compare: list["IlluminaSampleIndexSetting"],
100102
is_reverse_complement: bool,
101103
) -> None:
102104
"""Assign zero to barcode_mismatches_2 if the hamming distance between self.index2
@@ -130,7 +132,7 @@ def process_indexes(self, run_parameters: RunParameters):
130132

131133
def update_barcode_mismatches(
132134
self,
133-
samples_to_compare: list["FlowCellSample"],
135+
samples_to_compare: list["IlluminaSampleIndexSetting"],
134136
is_run_single_index: bool,
135137
is_reverse_complement: bool,
136138
) -> None:

cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44

55
from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_samples_by_lane
6-
from cg.apps.demultiplex.sample_sheet.sample_models import FlowCellSample
6+
from cg.apps.demultiplex.sample_sheet.sample_models import IlluminaSampleIndexSetting
77
from cg.constants.demultiplexing import IndexSettings, SampleSheetBCLConvertSections
88
from cg.models.demultiplex.run_parameters import RunParameters
99
from cg.models.run_devices.illumina_run_directory_data import IlluminaRunDirectoryData
@@ -12,22 +12,22 @@
1212

1313

1414
class SampleSheetCreator:
15-
"""Base class for sample sheet creation."""
15+
"""Base class for sample sheet creation for an Illumina run."""
1616

1717
def __init__(
1818
self,
19-
flow_cell: IlluminaRunDirectoryData,
20-
lims_samples: list[FlowCellSample],
19+
run_directory_data: IlluminaRunDirectoryData,
20+
samples: list[IlluminaSampleIndexSetting],
2121
):
22-
self.flow_cell: IlluminaRunDirectoryData = flow_cell
23-
self.flow_cell_id: str = flow_cell.id
24-
self.lims_samples: list[FlowCellSample] = lims_samples
25-
self.run_parameters: RunParameters = flow_cell.run_parameters
22+
self.run_directory_data: IlluminaRunDirectoryData = run_directory_data
23+
self.flow_cell_id: str = run_directory_data.id
24+
self.samples: list[IlluminaSampleIndexSetting] = samples
25+
self.run_parameters: RunParameters = run_directory_data.run_parameters
2626
self.index_settings: IndexSettings = self.run_parameters.index_settings
2727

2828
def convert_sample_to_header_dict(
2929
self,
30-
sample: FlowCellSample,
30+
sample: IlluminaSampleIndexSetting,
3131
data_column_names: list[str],
3232
) -> list[str]:
3333
"""Convert a lims sample object to a list that corresponds to the sample sheet headers."""
@@ -49,7 +49,7 @@ def get_additional_sections_sample_sheet(self) -> list[list[str]]:
4949
[
5050
SampleSheetBCLConvertSections.Header.INSTRUMENT_PLATFORM_TITLE.value,
5151
SampleSheetBCLConvertSections.Header.instrument_platform_sequencer().get(
52-
self.flow_cell.sequencer_type
52+
self.run_directory_data.sequencer_type
5353
),
5454
],
5555
SampleSheetBCLConvertSections.Header.index_orientation_forward(),
@@ -102,7 +102,7 @@ def create_sample_sheet_content(self) -> list[list[str]]:
102102
sample_sheet_content: list[list[str]] = (
103103
self.get_additional_sections_sample_sheet() + complete_data_section
104104
)
105-
for sample in self.lims_samples:
105+
for sample in self.samples:
106106
sample_sheet_content.append(
107107
self.convert_sample_to_header_dict(
108108
sample=sample,
@@ -113,15 +113,15 @@ def create_sample_sheet_content(self) -> list[list[str]]:
113113

114114
def process_samples_for_sample_sheet(self) -> None:
115115
"""Remove unwanted samples and adapt remaining samples."""
116-
for lims_sample in self.lims_samples:
117-
lims_sample.process_indexes(run_parameters=self.run_parameters)
116+
for sample in self.samples:
117+
sample.process_indexes(run_parameters=self.run_parameters)
118118
is_reverse_complement: bool = (
119119
self.index_settings.are_i5_override_cycles_reverse_complemented
120120
)
121-
for lane, samples_in_lane in get_samples_by_lane(self.lims_samples).items():
121+
for lane, samples_in_lane in get_samples_by_lane(self.samples).items():
122122
LOG.info(f"Updating barcode mismatch values for samples in lane {lane}")
123-
for lims_sample in samples_in_lane:
124-
lims_sample.update_barcode_mismatches(
123+
for sample in samples_in_lane:
124+
sample.update_barcode_mismatches(
125125
samples_to_compare=samples_in_lane,
126126
is_run_single_index=self.run_parameters.is_single_index,
127127
is_reverse_complement=is_reverse_complement,

cg/apps/demultiplex/sample_sheet/sample_sheet_models.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,25 @@
33

44
from pydantic import BaseModel
55

6-
from cg.apps.demultiplex.sample_sheet.sample_models import FlowCellSample
6+
from cg.apps.demultiplex.sample_sheet.sample_models import IlluminaSampleIndexSetting
77

88
LOG = logging.getLogger(__name__)
99

1010

1111
class SampleSheet(BaseModel):
12-
samples: list[FlowCellSample]
12+
samples: list[IlluminaSampleIndexSetting]
1313

1414
def get_non_pooled_lanes_and_samples(self) -> list[tuple[int, str]]:
1515
"""Return tuples of non-pooled lane and sample ids."""
1616
non_pooled_lane_sample_id_pairs: list[tuple[int, str]] = []
17-
non_pooled_samples: list[FlowCellSample] = self.get_non_pooled_samples()
17+
non_pooled_samples: list[IlluminaSampleIndexSetting] = self.get_non_pooled_samples()
1818
for sample in non_pooled_samples:
1919
non_pooled_lane_sample_id_pairs.append((sample.lane, sample.sample_id))
2020
return non_pooled_lane_sample_id_pairs
2121

22-
def get_non_pooled_samples(self) -> list[FlowCellSample]:
22+
def get_non_pooled_samples(self) -> list[IlluminaSampleIndexSetting]:
2323
"""Return samples that are sequenced solo in their lane."""
24-
lane_samples: dict[int, list[FlowCellSample]] = defaultdict(list)
24+
lane_samples: dict[int, list[IlluminaSampleIndexSetting]] = defaultdict(list)
2525
for sample in self.samples:
2626
lane_samples[sample.lane].append(sample)
2727
return [samples[0] for samples in lane_samples.values() if len(samples) == 1]

cg/apps/demultiplex/sample_sheet/sample_sheet_validator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77

88
from cg.apps.demultiplex.sample_sheet.override_cycles_validator import OverrideCyclesValidator
99
from cg.apps.demultiplex.sample_sheet.read_sample_sheet import (
10-
get_flow_cell_samples_from_content,
10+
get_samples_from_content,
1111
get_raw_samples_from_content,
1212
validate_samples_unique_per_lane,
1313
)
14-
from cg.apps.demultiplex.sample_sheet.sample_models import FlowCellSample
14+
from cg.apps.demultiplex.sample_sheet.sample_models import IlluminaSampleIndexSetting
1515
from cg.apps.demultiplex.sample_sheet.sample_sheet_models import SampleSheet
1616
from cg.constants.constants import FileFormat
1717
from cg.constants.demultiplexing import NAME_TO_INDEX_SETTINGS, SampleSheetBCLConvertSections
@@ -115,7 +115,7 @@ def _validate_samples(self) -> None:
115115
"""
116116
LOG.debug("Validating samples")
117117
try:
118-
validated_samples: list[FlowCellSample] = get_flow_cell_samples_from_content(
118+
validated_samples: list[IlluminaSampleIndexSetting] = get_samples_from_content(
119119
sample_sheet_content=self.content
120120
)
121121
except ValidationError as error:
@@ -181,5 +181,5 @@ def get_sample_sheet_object_from_file(self, file_path: Path) -> SampleSheet:
181181
SampleSheetError: If the sample sheet is not valid.
182182
"""
183183
self.validate_sample_sheet_from_file(file_path)
184-
samples: list[FlowCellSample] = get_flow_cell_samples_from_content(self.content)
184+
samples: list[IlluminaSampleIndexSetting] = get_samples_from_content(self.content)
185185
return SampleSheet(samples=samples)

0 commit comments

Comments
 (0)