|
9 | 9 | from housekeeper.store.models import Bundle, Version
|
10 | 10 |
|
11 | 11 | from cg.apps.environ import environ_email
|
12 |
| -from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority |
| 12 | +from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority, SequencingFileTag |
13 | 13 | from cg.constants.constants import (
|
14 | 14 | AnalysisType,
|
15 | 15 | CaseActions,
|
|
24 | 24 | from cg.meta.workflow.fastq import FastqHandler
|
25 | 25 | from cg.models.analysis import AnalysisModel
|
26 | 26 | from cg.models.cg_config import CGConfig
|
| 27 | +from cg.models.fastq import FastqFileMeta |
27 | 28 | from cg.store.models import Analysis, BedVersion, Case, CaseSample, Sample
|
28 | 29 |
|
29 | 30 | LOG = logging.getLogger(__name__)
|
@@ -288,58 +289,59 @@ def get_cases_to_qc(self) -> list[Case]:
|
288 | 289 | if self.trailblazer_api.is_latest_analysis_qc(case_id=case.internal_id)
|
289 | 290 | ]
|
290 | 291 |
|
291 |
| - def get_sample_fastq_destination_dir(self, case: Case, sample: Sample): |
| 292 | + def get_sample_fastq_destination_dir(self, case: Case, sample: Sample) -> Path: |
292 | 293 | """Return the path to the FASTQ destination directory."""
|
293 | 294 | raise NotImplementedError
|
294 | 295 |
|
295 |
| - def gather_file_metadata_for_sample(self, sample_obj: Sample) -> list[dict]: |
| 296 | + def gather_file_metadata_for_sample(self, sample: Sample) -> list[FastqFileMeta]: |
296 | 297 | return [
|
297 |
| - self.fastq_handler.parse_file_data(file_obj.full_path) |
298 |
| - for file_obj in self.housekeeper_api.files( |
299 |
| - bundle=sample_obj.internal_id, tags=["fastq"] |
| 298 | + self.fastq_handler.parse_file_data(hk_file.full_path) |
| 299 | + for hk_file in self.housekeeper_api.files( |
| 300 | + bundle=sample.internal_id, tags={SequencingFileTag.FASTQ} |
300 | 301 | )
|
301 | 302 | ]
|
302 | 303 |
|
303 | 304 | def link_fastq_files_for_sample(
|
304 |
| - self, case_obj: Case, sample_obj: Sample, concatenate: bool = False |
| 305 | + self, case: Case, sample: Sample, concatenate: bool = False |
305 | 306 | ) -> None:
|
306 | 307 | """
|
307 |
| - Link FASTQ files for a sample to working directory. |
| 308 | + Link FASTQ files for a sample to the work directory. |
308 | 309 | If pipeline input requires concatenated fastq, files can also be concatenated
|
309 | 310 | """
|
310 |
| - linked_reads_paths = {1: [], 2: []} |
311 |
| - concatenated_paths = {1: "", 2: ""} |
312 |
| - files: list[dict] = self.gather_file_metadata_for_sample(sample_obj=sample_obj) |
313 |
| - sorted_files = sorted(files, key=lambda k: k["path"]) |
314 |
| - fastq_dir = self.get_sample_fastq_destination_dir(case=case_obj, sample=sample_obj) |
| 311 | + linked_reads_paths: dict[int, list[Path]] = {1: [], 2: []} |
| 312 | + concatenated_paths: dict[int, str] = {1: "", 2: ""} |
| 313 | + fastq_files_meta: list[FastqFileMeta] = self.gather_file_metadata_for_sample(sample=sample) |
| 314 | + sorted_fastq_files_meta: list[FastqFileMeta] = sorted( |
| 315 | + fastq_files_meta, key=lambda k: k.path |
| 316 | + ) |
| 317 | + fastq_dir: Path = self.get_sample_fastq_destination_dir(case=case, sample=sample) |
315 | 318 | fastq_dir.mkdir(parents=True, exist_ok=True)
|
316 | 319 |
|
317 |
| - for fastq_data in sorted_files: |
318 |
| - fastq_path = Path(fastq_data["path"]) |
319 |
| - fastq_name = self.fastq_handler.create_fastq_name( |
320 |
| - lane=fastq_data["lane"], |
321 |
| - flowcell=fastq_data["flowcell"], |
322 |
| - sample=sample_obj.internal_id, |
323 |
| - read=fastq_data["read"], |
324 |
| - undetermined=fastq_data["undetermined"], |
325 |
| - meta=self.get_additional_naming_metadata(sample_obj), |
| 320 | + for fastq_file in sorted_fastq_files_meta: |
| 321 | + fastq_file_name: str = self.fastq_handler.create_fastq_name( |
| 322 | + lane=fastq_file.lane, |
| 323 | + flow_cell=fastq_file.flow_cell_id, |
| 324 | + sample=sample.internal_id, |
| 325 | + read_direction=fastq_file.read_direction, |
| 326 | + undetermined=fastq_file.undetermined, |
| 327 | + meta=self.get_lims_naming_metadata(sample), |
326 | 328 | )
|
327 |
| - destination_path: Path = fastq_dir / fastq_name |
328 |
| - linked_reads_paths[fastq_data["read"]].append(destination_path) |
| 329 | + destination_path = Path(fastq_dir, fastq_file_name) |
| 330 | + linked_reads_paths[fastq_file.read_direction].append(destination_path) |
329 | 331 | concatenated_paths[
|
330 |
| - fastq_data["read"] |
331 |
| - ] = f"{fastq_dir}/{self.fastq_handler.get_concatenated_name(fastq_name)}" |
| 332 | + fastq_file.read_direction |
| 333 | + ] = f"{fastq_dir}/{self.fastq_handler.get_concatenated_name(fastq_file_name)}" |
332 | 334 |
|
333 | 335 | if not destination_path.exists():
|
334 |
| - LOG.info(f"Linking: {fastq_path} -> {destination_path}") |
335 |
| - destination_path.symlink_to(fastq_path) |
| 336 | + LOG.info(f"Linking: {fastq_file.path} -> {destination_path}") |
| 337 | + destination_path.symlink_to(fastq_file.path) |
336 | 338 | else:
|
337 | 339 | LOG.warning(f"Destination path already exists: {destination_path}")
|
338 | 340 |
|
339 | 341 | if not concatenate:
|
340 | 342 | return
|
341 | 343 |
|
342 |
| - LOG.info("Concatenation in progress for sample %s.", sample_obj.internal_id) |
| 344 | + LOG.info(f"Concatenation in progress for sample: {sample.internal_id}") |
343 | 345 | for read, value in linked_reads_paths.items():
|
344 | 346 | self.fastq_handler.concatenate(linked_reads_paths[read], concatenated_paths[read])
|
345 | 347 | self.fastq_handler.remove_files(value)
|
@@ -435,7 +437,7 @@ def get_date_from_file_path(file_path: Path) -> dt.datetime.date:
|
435 | 437 | """
|
436 | 438 | return dt.datetime.fromtimestamp(int(os.path.getctime(file_path)))
|
437 | 439 |
|
438 |
| - def get_additional_naming_metadata(self, sample_obj: Sample) -> str | None: |
| 440 | + def get_lims_naming_metadata(self, sample: Sample) -> str | None: |
439 | 441 | return None
|
440 | 442 |
|
441 | 443 | def get_latest_metadata(self, case_id: str) -> AnalysisModel:
|
|
0 commit comments