Skip to content

Commit b5cb6ca

Browse files
authored
Renamed workflow fastq to raw-data (#3708)(minor)
## Description Closes Clinical-Genomics/add-new-tech#93 and redo of #3676 due to conflicts The long-read sequencing devices have bam as raw data output format instead of fastq, used by Illumina. We want to standardise the ordering of sequencing data without analysis, so we decided to rename the workflow fastq to raw-data.
1 parent c2b0733 commit b5cb6ca

File tree

46 files changed

+302
-131
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+302
-131
lines changed
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""rename_fastq_to_raw_data
2+
Revision ID: 18dbadd8c436
3+
Revises: bb4c6dbad991
4+
Create Date: 2024-09-11 13:15:11.876822
5+
"""
6+
7+
from enum import StrEnum
8+
9+
from sqlalchemy import orm
10+
from sqlalchemy.dialects import mysql
11+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
12+
13+
from alembic import op
14+
15+
# revision identifiers, used by Alembic.
16+
revision = "18dbadd8c436"
17+
down_revision = "7770dcad8bde"
18+
branch_labels = None
19+
depends_on = None
20+
21+
22+
base_options = (
23+
"balsamic",
24+
"balsamic-pon",
25+
"balsamic-qc",
26+
"balsamic-umi",
27+
"demultiplex",
28+
"fluffy",
29+
"jasen",
30+
"microsalt",
31+
"mip-dna",
32+
"mip-rna",
33+
"mutant",
34+
"raredisease",
35+
"rnafusion",
36+
"rsync",
37+
"spring",
38+
"taxprofiler",
39+
"tomte",
40+
)
41+
old_options = sorted(base_options + ("fastq",))
42+
new_options = sorted(base_options + ("raw-data",))
43+
44+
old_enum = mysql.ENUM(*old_options)
45+
new_enum = mysql.ENUM(*new_options)
46+
47+
48+
class Workflow(StrEnum):
49+
BALSAMIC: str = "balsamic"
50+
BALSAMIC_PON: str = "balsamic-pon"
51+
BALSAMIC_QC: str = "balsamic-qc"
52+
BALSAMIC_UMI: str = "balsamic-umi"
53+
DEMULTIPLEX: str = "demultiplex"
54+
FLUFFY: str = "fluffy"
55+
JASEN: str = "jasen"
56+
MICROSALT: str = "microsalt"
57+
MIP_DNA: str = "mip-dna"
58+
MIP_RNA: str = "mip-rna"
59+
MUTANT: str = "mutant"
60+
RAREDISEASE: str = "raredisease"
61+
RAW_DATA: str = "raw-data"
62+
RNAFUSION: str = "rnafusion"
63+
RSYNC: str = "rsync"
64+
SPRING: str = "spring"
65+
TAXPROFILER: str = "taxprofiler"
66+
TOMTE: str = "tomte"
67+
68+
69+
class Base(DeclarativeBase):
70+
pass
71+
72+
73+
class Case(Base):
74+
__tablename__ = "case"
75+
id: Mapped[int] = mapped_column(primary_key=True)
76+
data_analysis: Mapped[str | None]
77+
78+
79+
class Analysis(Base):
80+
__tablename__ = "analysis"
81+
82+
id: Mapped[int] = mapped_column(primary_key=True)
83+
workflow: Mapped[str | None]
84+
85+
86+
class Order(Base):
87+
__tablename__ = "order"
88+
id: Mapped[int] = mapped_column(primary_key=True)
89+
workflow: Mapped[str | None]
90+
91+
92+
class ApplicationLimitations(Base):
93+
__tablename__ = "application_limitations"
94+
id: Mapped[int] = mapped_column(primary_key=True)
95+
workflow: Mapped[str | None]
96+
97+
98+
def upgrade():
99+
bind = op.get_bind()
100+
session = orm.Session(bind=bind)
101+
try:
102+
# Case
103+
op.alter_column("case", "data_analysis", type_=mysql.VARCHAR(64), existing_nullable=True)
104+
session.query(Case).filter(Case.data_analysis == "fastq").update(
105+
{"data_analysis": Workflow.RAW_DATA}, synchronize_session="evaluate"
106+
)
107+
op.alter_column("case", "data_analysis", type_=new_enum, existing_nullable=True)
108+
# Analysis
109+
op.alter_column("analysis", "workflow", type_=mysql.VARCHAR(64), existing_nullable=True)
110+
session.query(Analysis).filter(Analysis.workflow == "fastq").update(
111+
{"workflow": Workflow.RAW_DATA}, synchronize_session="evaluate"
112+
)
113+
op.alter_column("analysis", "workflow", type_=new_enum, existing_nullable=True)
114+
# Order
115+
op.alter_column("order", "workflow", type_=mysql.VARCHAR(64), existing_nullable=True)
116+
session.query(Order).filter(Order.workflow == "fastq").update(
117+
{"workflow": Workflow.RAW_DATA}, synchronize_session="evaluate"
118+
)
119+
op.alter_column("order", "workflow", type_=new_enum, existing_nullable=True)
120+
# Application Limitation
121+
op.alter_column(
122+
"application_limitations", "workflow", type_=mysql.VARCHAR(64), existing_nullable=True
123+
)
124+
session.query(ApplicationLimitations).filter(
125+
ApplicationLimitations.workflow == "fastq"
126+
).update({"workflow": Workflow.RAW_DATA}, synchronize_session="evaluate")
127+
op.alter_column(
128+
"application_limitations", "workflow", type_=new_enum, existing_nullable=True
129+
)
130+
session.commit()
131+
finally:
132+
session.close()
133+
134+
135+
def downgrade():
136+
bind = op.get_bind()
137+
session = orm.Session(bind=bind)
138+
try:
139+
# Case
140+
op.alter_column("case", "data_analysis", type_=mysql.VARCHAR(64), existing_nullable=True)
141+
session.query(Case).filter(Case.data_analysis == Workflow.RAW_DATA).update(
142+
{"data_analysis": "fastq"}, synchronize_session="evaluate"
143+
)
144+
op.alter_column("case", "data_analysis", type_=old_enum, existing_nullable=True)
145+
# Analysis
146+
op.alter_column("analysis", "workflow", type_=mysql.VARCHAR(64), existing_nullable=True)
147+
session.query(Analysis).filter(Analysis.workflow == Workflow.RAW_DATA).update(
148+
{"workflow": "fastq"}, synchronize_session="evaluate"
149+
)
150+
op.alter_column("analysis", "workflow", type_=old_enum, existing_nullable=True)
151+
# Order
152+
op.alter_column("order", "workflow", type_=mysql.VARCHAR(64), existing_nullable=True)
153+
session.query(Order).filter(Order.workflow == Workflow.RAW_DATA).update(
154+
{"workflow": "fastq"}, synchronize_session="evaluate"
155+
)
156+
op.alter_column("order", "workflow", type_=old_enum, existing_nullable=True)
157+
# Application Limitation
158+
op.alter_column(
159+
"application_limitations", "workflow", type_=mysql.VARCHAR(64), existing_nullable=True
160+
)
161+
session.query(ApplicationLimitations).filter(
162+
ApplicationLimitations.workflow == Workflow.RAW_DATA
163+
).update({"workflow": "fastq"}, synchronize_session="evaluate")
164+
op.alter_column(
165+
"application_limitations", "workflow", type_=old_enum, existing_nullable=True
166+
)
167+
session.commit()
168+
finally:
169+
session.close()

cg/cli/deliver/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@
1111
from cg.constants import Workflow
1212
from cg.constants.cli_options import DRY_RUN
1313
from cg.constants.delivery import FileDeliveryOption
14-
from cg.services.deliver_files.delivery_rsync_service.delivery_rsync_service import (
15-
DeliveryRsyncService,
16-
)
1714
from cg.models.cg_config import CGConfig
1815
from cg.services.deliver_files.deliver_files_service.deliver_files_service import (
1916
DeliverFilesService,
2017
)
2118
from cg.services.deliver_files.deliver_files_service.deliver_files_service_factory import (
2219
DeliveryServiceFactory,
2320
)
24-
from cg.store.models import Case, Analysis
21+
from cg.services.deliver_files.delivery_rsync_service.delivery_rsync_service import (
22+
DeliveryRsyncService,
23+
)
24+
from cg.store.models import Analysis, Case
2525

2626
LOG = logging.getLogger(__name__)
2727

@@ -148,7 +148,7 @@ def deliver_auto_raw_data(context: CGConfig, dry_run: bool):
148148
clinical-delivery."""
149149
service_builder: DeliveryServiceFactory = context.delivery_service_factory
150150
analyses: list[Analysis] = context.analysis_service.get_analyses_to_upload_for_workflow(
151-
workflow=Workflow.FASTQ
151+
workflow=Workflow.RAW_DATA
152152
)
153153
deliver_raw_data_for_analyses(
154154
analyses=analyses,

cg/cli/deliver/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@
99
from cg.services.deliver_files.deliver_files_service.deliver_files_service_factory import (
1010
DeliveryServiceFactory,
1111
)
12-
from cg.store.models import Case, Analysis
12+
from cg.store.models import Analysis, Case
1313
from cg.store.store import Store
1414

15-
1615
LOG = logging.getLogger(__name__)
1716

1817

@@ -29,7 +28,7 @@ def deliver_raw_data_for_analyses(
2928
case: Case = analysis.case
3029
delivery_service: DeliverFilesService = service_builder.build_delivery_service(
3130
delivery_type=case.data_delivery,
32-
workflow=Workflow.FASTQ,
31+
workflow=Workflow.RAW_DATA,
3332
)
3433

3534
delivery_service.deliver_files_for_case(

cg/cli/workflow/fastq/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,6 @@ def store_fastq_analysis(context: click.Context, case_id: str, dry_run: bool = F
4444
def store_available_fastq_analysis(context: click.Context, dry_run: bool = False):
4545
"""Creates an analysis object in status-db for all fastq cases to be delivered."""
4646
status_db: Store = context.obj.status_db
47-
for case in status_db.cases_to_analyse(workflow=Workflow.FASTQ):
47+
for case in status_db.cases_to_analyse(workflow=Workflow.RAW_DATA):
4848
if SequencingQCService.case_pass_sequencing_qc(case):
4949
context.invoke(store_fastq_analysis, case_id=case.internal_id, dry_run=dry_run)

cg/cli/workflow/fastq/fastq_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def _get_case(self, case_id: str) -> Case:
2525

2626
def _add_analysis_to_store(self, case: Case) -> None:
2727
new_analysis: Analysis = self.store.add_analysis(
28-
workflow=Workflow.FASTQ,
28+
workflow=Workflow.RAW_DATA,
2929
completed_at=dt.datetime.now(),
3030
primary=True,
3131
started_at=dt.datetime.now(),
@@ -42,7 +42,7 @@ def _add_analysis_to_trailblazer(self, case: Case) -> None:
4242
order_id=case.latest_order.id,
4343
out_dir="",
4444
slurm_quality_of_service=case.slurm_priority,
45-
workflow=Workflow.FASTQ,
45+
workflow=Workflow.RAW_DATA,
4646
ticket=case.latest_ticket,
4747
)
4848
self.trailblazer_api.set_analysis_status(

cg/constants/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,14 @@ class Workflow(StrEnum):
135135
BALSAMIC_QC: str = "balsamic-qc"
136136
BALSAMIC_UMI: str = "balsamic-umi"
137137
DEMULTIPLEX: str = "demultiplex"
138-
FASTQ: str = "fastq"
139138
FLUFFY: str = "fluffy"
140139
JASEN: str = "jasen"
141140
MICROSALT: str = "microsalt"
142141
MIP_DNA: str = "mip-dna"
143142
MIP_RNA: str = "mip-rna"
144143
MUTANT: str = "mutant"
145144
RAREDISEASE: str = "raredisease"
145+
RAW_DATA: str = "raw-data"
146146
RNAFUSION: str = "rnafusion"
147147
RSYNC: str = "rsync"
148148
SPRING: str = "spring"

cg/constants/delivery.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,18 @@
33
from enum import StrEnum
44

55
from cg.constants.constants import Workflow
6-
from cg.constants.housekeeper_tags import (
7-
AlignmentFileTag,
8-
AnalysisTag,
9-
HermesFileTag,
10-
)
6+
from cg.constants.housekeeper_tags import AlignmentFileTag, AnalysisTag, HermesFileTag
117

128
ONLY_ONE_CASE_PER_TICKET: list[Workflow] = [
13-
Workflow.FASTQ,
149
Workflow.MICROSALT,
1510
Workflow.MUTANT,
11+
Workflow.RAW_DATA,
1612
]
1713

1814
SKIP_MISSING: list[Workflow] = [
19-
Workflow.FASTQ,
2015
Workflow.MICROSALT,
2116
Workflow.MUTANT,
17+
Workflow.RAW_DATA,
2218
]
2319

2420
BALSAMIC_ANALYSIS_CASE_TAGS: list[set[str]] = [
@@ -178,10 +174,6 @@
178174
"case_tags": MICROSALT_ANALYSIS_CASE_TAGS,
179175
"sample_tags": MICROSALT_ANALYSIS_SAMPLE_TAGS,
180176
},
181-
Workflow.FASTQ: {
182-
"case_tags": RAW_DATA_ANALYSIS_CASE_TAGS,
183-
"sample_tags": RAW_DATA_ANALYSIS_SAMPLE_TAGS,
184-
},
185177
Workflow.MUTANT: {
186178
"case_tags": MUTANT_ANALYSIS_CASE_TAGS,
187179
"sample_tags": MUTANT_ANALYSIS_SAMPLE_TAGS,
@@ -190,6 +182,10 @@
190182
"case_tags": CLINICAL_DELIVERY_TAGS,
191183
"sample_tags": CLINICAL_DELIVERY_TAGS,
192184
},
185+
Workflow.RAW_DATA: {
186+
"case_tags": RAW_DATA_ANALYSIS_CASE_TAGS,
187+
"sample_tags": RAW_DATA_ANALYSIS_SAMPLE_TAGS,
188+
},
193189
Workflow.RNAFUSION: {
194190
"case_tags": CLINICAL_DELIVERY_TAGS,
195191
"sample_tags": CLINICAL_DELIVERY_TAGS,

cg/constants/housekeeper_tags.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ class BalsamicProtectedTags:
126126
Workflow.BALSAMIC_QC: BalsamicProtectedTags.QC,
127127
Workflow.BALSAMIC_PON: [],
128128
Workflow.BALSAMIC_UMI: BalsamicProtectedTags.QC + BalsamicProtectedTags.VARIANT_CALLERS,
129-
Workflow.FASTQ: [],
130129
Workflow.FLUFFY: ["NIPT_csv", "MultiQC"],
131130
Workflow.MICROSALT: [
132131
["microsalt-log"],
@@ -205,6 +204,7 @@ class BalsamicProtectedTags:
205204
Workflow.RAREDISEASE: [
206205
[HermesFileTag.LONG_TERM_STORAGE],
207206
],
207+
Workflow.RAW_DATA: [],
208208
Workflow.RNAFUSION: [
209209
[HermesFileTag.LONG_TERM_STORAGE],
210210
[AnalysisTag.FUSION, AnalysisTag.ARRIBA], # legacy

cg/meta/delivery/delivery.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def get_fastq_delivery_files_by_sample(
167167
) -> list[DeliveryFile]:
168168
"""Return a list of FASTQ files to be delivered for a specific sample."""
169169
delivery_files: list[DeliveryFile] = []
170-
fastq_tags: list[set[str]] = self.get_analysis_sample_tags_for_workflow(Workflow.FASTQ)
170+
fastq_tags: list[set[str]] = self.get_analysis_sample_tags_for_workflow(Workflow.RAW_DATA)
171171
if not self.is_sample_deliverable(sample=sample, force=force):
172172
LOG.warning(f"Sample {sample.internal_id} is not deliverable")
173173
return delivery_files

cg/meta/transfer/external_data.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
from cg.constants import HK_FASTQ_TAGS, FileExtensions
99
from cg.constants.constants import CaseActions
1010
from cg.meta.meta import MetaAPI
11+
from cg.meta.transfer.utils import are_all_fastq_valid
12+
from cg.models.cg_config import CGConfig
13+
from cg.models.slurm.sbatch import Sbatch
1114
from cg.services.deliver_files.delivery_rsync_service.sbatch import (
1215
ERROR_RSYNC_FUNCTION,
1316
RSYNC_CONTENTS_COMMAND,
1417
)
15-
from cg.meta.transfer.utils import are_all_fastq_valid
16-
from cg.models.cg_config import CGConfig
17-
from cg.models.slurm.sbatch import Sbatch
1818
from cg.store.models import Case, Customer, Sample
1919
from cg.utils.files import get_files_matching_pattern
2020

@@ -144,7 +144,9 @@ def _get_fastq_paths_to_add(self, sample_id: str) -> list[Path]:
144144
LOG.debug(f"Checking fastq files in {sample_folder}")
145145
file_paths: list[Path] = [
146146
sample_folder.joinpath(path)
147-
for path in get_files_matching_pattern(directory=sample_folder, pattern="*.fastq.gz")
147+
for path in get_files_matching_pattern(
148+
directory=sample_folder, pattern=f"*{FileExtensions.FASTQ_GZ}"
149+
)
148150
]
149151
LOG.debug(f"Found {len(file_paths)} fastq files for sample {sample_id}")
150152
hk_version: Version = self.housekeeper_api.get_or_create_version(bundle_name=sample_id)

0 commit comments

Comments
 (0)