Skip to content

Commit

Permalink
Merge branch 'master' into archiving-refactor-ddn
Browse files Browse the repository at this point in the history
  • Loading branch information
islean authored Dec 14, 2023
2 parents 042a829 + 2627438 commit f8c5242
Show file tree
Hide file tree
Showing 26 changed files with 375 additions and 187 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 54.4.1
current_version = 54.4.4
commit = True
tag = True
tag_name = v{new_version}
Expand Down
2 changes: 1 addition & 1 deletion cg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__title__ = "cg"
__version__ = "54.4.1"
__version__ = "54.4.4"
8 changes: 8 additions & 0 deletions cg/io/gzip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import gzip
from pathlib import Path


def read_gzip_first_line(file_path: Path) -> str:
"""Return first line of gzip file."""
with gzip.open(file_path) as file:
return file.readline().decode()
69 changes: 36 additions & 33 deletions cg/meta/workflow/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from housekeeper.store.models import Bundle, Version

from cg.apps.environ import environ_email
from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority
from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority, SequencingFileTag
from cg.constants.constants import (
AnalysisType,
CaseActions,
Expand All @@ -24,18 +24,20 @@
from cg.meta.workflow.fastq import FastqHandler
from cg.models.analysis import AnalysisModel
from cg.models.cg_config import CGConfig
from cg.models.fastq import FastqFileMeta
from cg.store.models import Analysis, BedVersion, Case, CaseSample, Sample

LOG = logging.getLogger(__name__)


def add_gene_panel_combo(default_panels: set[str]) -> set[str]:
"""Add gene panels combinations for gene panels being part of gene panel combination and return updated gene panels."""
all_panels = default_panels
additional_panels = set()
for panel in default_panels:
if panel in GenePanelCombo.COMBO_1:
all_panels |= GenePanelCombo.COMBO_1.get(panel)
return all_panels
additional_panels |= GenePanelCombo.COMBO_1.get(panel)
default_panels |= additional_panels
return default_panels


class AnalysisAPI(MetaAPI):
Expand Down Expand Up @@ -288,58 +290,59 @@ def get_cases_to_qc(self) -> list[Case]:
if self.trailblazer_api.is_latest_analysis_qc(case_id=case.internal_id)
]

def get_sample_fastq_destination_dir(self, case: Case, sample: Sample):
def get_sample_fastq_destination_dir(self, case: Case, sample: Sample) -> Path:
"""Return the path to the FASTQ destination directory."""
raise NotImplementedError

def gather_file_metadata_for_sample(self, sample_obj: Sample) -> list[dict]:
def gather_file_metadata_for_sample(self, sample: Sample) -> list[FastqFileMeta]:
return [
self.fastq_handler.parse_file_data(file_obj.full_path)
for file_obj in self.housekeeper_api.files(
bundle=sample_obj.internal_id, tags=["fastq"]
self.fastq_handler.parse_file_data(hk_file.full_path)
for hk_file in self.housekeeper_api.files(
bundle=sample.internal_id, tags={SequencingFileTag.FASTQ}
)
]

def link_fastq_files_for_sample(
self, case_obj: Case, sample_obj: Sample, concatenate: bool = False
self, case: Case, sample: Sample, concatenate: bool = False
) -> None:
"""
Link FASTQ files for a sample to working directory.
Link FASTQ files for a sample to the work directory.
If pipeline input requires concatenated fastq, files can also be concatenated
"""
linked_reads_paths = {1: [], 2: []}
concatenated_paths = {1: "", 2: ""}
files: list[dict] = self.gather_file_metadata_for_sample(sample_obj=sample_obj)
sorted_files = sorted(files, key=lambda k: k["path"])
fastq_dir = self.get_sample_fastq_destination_dir(case=case_obj, sample=sample_obj)
linked_reads_paths: dict[int, list[Path]] = {1: [], 2: []}
concatenated_paths: dict[int, str] = {1: "", 2: ""}
fastq_files_meta: list[FastqFileMeta] = self.gather_file_metadata_for_sample(sample=sample)
sorted_fastq_files_meta: list[FastqFileMeta] = sorted(
fastq_files_meta, key=lambda k: k.path
)
fastq_dir: Path = self.get_sample_fastq_destination_dir(case=case, sample=sample)
fastq_dir.mkdir(parents=True, exist_ok=True)

for fastq_data in sorted_files:
fastq_path = Path(fastq_data["path"])
fastq_name = self.fastq_handler.create_fastq_name(
lane=fastq_data["lane"],
flowcell=fastq_data["flowcell"],
sample=sample_obj.internal_id,
read=fastq_data["read"],
undetermined=fastq_data["undetermined"],
meta=self.get_additional_naming_metadata(sample_obj),
for fastq_file in sorted_fastq_files_meta:
fastq_file_name: str = self.fastq_handler.create_fastq_name(
lane=fastq_file.lane,
flow_cell=fastq_file.flow_cell_id,
sample=sample.internal_id,
read_direction=fastq_file.read_direction,
undetermined=fastq_file.undetermined,
meta=self.get_lims_naming_metadata(sample),
)
destination_path: Path = fastq_dir / fastq_name
linked_reads_paths[fastq_data["read"]].append(destination_path)
destination_path = Path(fastq_dir, fastq_file_name)
linked_reads_paths[fastq_file.read_direction].append(destination_path)
concatenated_paths[
fastq_data["read"]
] = f"{fastq_dir}/{self.fastq_handler.get_concatenated_name(fastq_name)}"
fastq_file.read_direction
] = f"{fastq_dir}/{self.fastq_handler.get_concatenated_name(fastq_file_name)}"

if not destination_path.exists():
LOG.info(f"Linking: {fastq_path} -> {destination_path}")
destination_path.symlink_to(fastq_path)
LOG.info(f"Linking: {fastq_file.path} -> {destination_path}")
destination_path.symlink_to(fastq_file.path)
else:
LOG.warning(f"Destination path already exists: {destination_path}")

if not concatenate:
return

LOG.info("Concatenation in progress for sample %s.", sample_obj.internal_id)
LOG.info(f"Concatenation in progress for sample: {sample.internal_id}")
for read, value in linked_reads_paths.items():
self.fastq_handler.concatenate(linked_reads_paths[read], concatenated_paths[read])
self.fastq_handler.remove_files(value)
Expand Down Expand Up @@ -435,7 +438,7 @@ def get_date_from_file_path(file_path: Path) -> dt.datetime.date:
"""
return dt.datetime.fromtimestamp(int(os.path.getctime(file_path)))

def get_additional_naming_metadata(self, sample_obj: Sample) -> str | None:
def get_lims_naming_metadata(self, sample: Sample) -> str | None:
return None

def get_latest_metadata(self, case_id: str) -> AnalysisModel:
Expand Down
23 changes: 12 additions & 11 deletions cg/meta/workflow/balsamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
BalsamicWGSQCMetrics,
)
from cg.models.cg_config import CGConfig
from cg.models.fastq import FastqFileMeta
from cg.store.models import Case, CaseSample, Sample
from cg.utils import Process
from cg.utils.utils import build_command_from_dict, get_string_from_list_by_pattern
Expand Down Expand Up @@ -146,22 +147,22 @@ def get_sample_fastq_destination_dir(self, case: Case, sample: Sample = None) ->
return Path(self.get_case_path(case.internal_id), FileFormat.FASTQ)

def link_fastq_files(self, case_id: str, dry_run: bool = False) -> None:
case_obj = self.status_db.get_case_by_internal_id(internal_id=case_id)
for link in case_obj.links:
self.link_fastq_files_for_sample(
case_obj=case_obj, sample_obj=link.sample, concatenate=True
)
case = self.status_db.get_case_by_internal_id(internal_id=case_id)
for link in case.links:
self.link_fastq_files_for_sample(case=case, sample=link.sample, concatenate=True)

def get_concatenated_fastq_path(self, link_object: CaseSample) -> Path:
"""Returns path to the concatenated FASTQ file of a sample"""
file_collection: list[dict] = self.gather_file_metadata_for_sample(link_object.sample)
"""Returns the path to the concatenated FASTQ file of a sample"""
file_collection: list[FastqFileMeta] = self.gather_file_metadata_for_sample(
link_object.sample
)
fastq_data = file_collection[0]
linked_fastq_name = self.fastq_handler.create_fastq_name(
lane=fastq_data["lane"],
flowcell=fastq_data["flowcell"],
lane=fastq_data.lane,
flow_cell=fastq_data.flow_cell_id,
sample=link_object.sample.internal_id,
read=fastq_data["read"],
undetermined=fastq_data["undetermined"],
read_direction=fastq_data.read_direction,
undetermined=fastq_data.undetermined,
)
concatenated_fastq_name: str = self.fastq_handler.get_concatenated_name(linked_fastq_name)
return Path(
Expand Down
Loading

0 comments on commit f8c5242

Please sign in to comment.