Skip to content

Commit

Permalink
Merge branch 'master' into refactor-sql-addhandler
Browse files Browse the repository at this point in the history
  • Loading branch information
henrikstranneheim committed Jan 7, 2024
2 parents cce1969 + 887ba49 commit 3f9afe1
Show file tree
Hide file tree
Showing 147 changed files with 9,921 additions and 1,687 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 54.5.3
current_version = 54.10.0
commit = True
tag = True
tag_name = v{new_version}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests_and_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:

- name: Test with pytest & Coveralls
run: |
pytest --cov=cg/
pytest -n auto --cov=cg/
coveralls
env:
COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion cg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__title__ = "cg"
__version__ = "54.5.3"
__version__ = "54.10.0"
25 changes: 15 additions & 10 deletions cg/apps/demultiplex/sample_sheet/create.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging

from cg.apps.demultiplex.sample_sheet.models import FlowCellSample
from cg.apps.demultiplex.sample_sheet.sample_models import (
FlowCellSampleBcl2Fastq,
FlowCellSampleBCLConvert,
)
from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import (
SampleSheetCreator,
SampleSheetCreatorBcl2Fastq,
SampleSheetCreatorBCLConvert,
)
Expand All @@ -14,9 +16,9 @@

def get_sample_sheet_creator(
flow_cell: FlowCellDirectoryData,
lims_samples: list[FlowCellSample],
lims_samples: list[FlowCellSampleBcl2Fastq | FlowCellSampleBCLConvert],
force: bool,
) -> SampleSheetCreator:
) -> SampleSheetCreatorBcl2Fastq | SampleSheetCreatorBCLConvert:
"""Returns an initialised sample sheet creator according to the demultiplexing software."""
if flow_cell.bcl_converter == BclConverter.BCL2FASTQ:
return SampleSheetCreatorBcl2Fastq(
Expand All @@ -27,16 +29,19 @@ def get_sample_sheet_creator(

def create_sample_sheet(
flow_cell: FlowCellDirectoryData,
lims_samples: list[FlowCellSample],
lims_samples: list[FlowCellSampleBcl2Fastq | FlowCellSampleBCLConvert],
force: bool = False,
) -> list[list[str]]:
"""Create a sample sheet for a flow cell."""
sample_sheet_creator: SampleSheetCreator = get_sample_sheet_creator(
flow_cell=flow_cell,
lims_samples=lims_samples,
force=force,
sample_sheet_creator: SampleSheetCreatorBcl2Fastq | SampleSheetCreatorBCLConvert = (
get_sample_sheet_creator(
flow_cell=flow_cell,
lims_samples=lims_samples,
force=force,
)
)
LOG.info(
f"Constructing a {flow_cell.bcl_converter} sample sheet for the {flow_cell.sequencer_type} flow cell {flow_cell.id}"
f"Constructing a {flow_cell.bcl_converter} sample sheet "
f"for the {flow_cell.sequencer_type} flow cell {flow_cell.id}"
)
return sample_sheet_creator.construct_sample_sheet()
105 changes: 8 additions & 97 deletions cg/apps/demultiplex/sample_sheet/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,8 @@

from pydantic import BaseModel

from cg.apps.demultiplex.sample_sheet.models import (
FlowCellSample,
FlowCellSampleBcl2Fastq,
FlowCellSampleBCLConvert,
)
from cg.constants.constants import FileFormat
from cg.constants.sequencing import Sequencers
from cg.constants.symbols import DASH
from cg.io.controller import ReadFile
from cg.resources import VALID_INDEXES_PATH
from cg.utils.utils import get_hamming_distance
Expand All @@ -25,7 +20,7 @@

def is_dual_index(index: str) -> bool:
"""Determines if an index in the raw sample sheet is dual index or not."""
return "-" in index
return DASH in index


class Index(BaseModel):
Expand All @@ -51,20 +46,14 @@ def get_valid_indexes(dual_indexes_only: bool = True) -> list[Index]:
return indexes


def get_index_pair(sample: FlowCellSample) -> tuple[str, str]:
"""Returns a sample index separated into index 1 and index 2."""
if is_dual_index(sample.index):
index_1, index_2 = sample.index.split("-")
return index_1.strip().replace("NNNNNNNNN", ""), index_2.strip()
return sample.index.replace("NNNNNNNNN", ""), sample.index2


def is_padding_needed(index_cycles: int, sample_index_length: int) -> bool:
def is_padding_needed(index1_cycles: int, index2_cycles: int, sample_index_length: int) -> bool:
"""Returns whether a sample needs padding or not given the sample index length.
A sample needs padding if its adapted index length is shorter than the number of index cycles
reads stated in the run parameters file of the sequencing. This happens when the sample index
is 8 nucleotides long and the number of index cycles read is 10 nucleotides.
A sample from a NovaSeq6000 flow cell needs padding if its adapted index lengths are shorter
than the number of index cycles reads stated in the run parameters file for both indexes.
This happens when the sample index is 8 nucleotides long and the number of index cycles read is
10 nucleotides long.
"""
index_cycles: int | None = index1_cycles if index1_cycles == index2_cycles else None
return index_cycles == LONG_INDEX_CYCLE_NR and sample_index_length == SHORT_SAMPLE_INDEX_LENGTH


Expand Down Expand Up @@ -95,81 +84,3 @@ def get_hamming_distance_for_indexes(sequence_1: str, sequence_2: str) -> int:
return get_hamming_distance(
str_1=sequence_1[:shortest_index_length], str_2=sequence_2[:shortest_index_length]
)


def update_barcode_mismatch_values_for_sample(
sample_to_update: FlowCellSampleBCLConvert,
samples_to_compare_to: list[FlowCellSampleBCLConvert],
) -> None:
"""Updates the sample's barcode mismatch values.
If a sample index has a hamming distance to any other sample lower than the threshold
(3 nucleotides), the barcode mismatch value for that index is set to zero."""
index_1_sample_to_update, index_2_sample_to_update = get_index_pair(sample=sample_to_update)
for sample_to_compare_to in samples_to_compare_to:
if sample_to_update.sample_id == sample_to_compare_to.sample_id:
continue
index_1, index_2 = get_index_pair(sample=sample_to_compare_to)
if (
get_hamming_distance_for_indexes(
sequence_1=index_1_sample_to_update, sequence_2=index_1
)
< MINIMUM_HAMMING_DISTANCE
):
LOG.debug(
f"Turning barcode mismatch for index 1 to 0 for sample {sample_to_update.sample_id}"
)
sample_to_update.barcode_mismatches_1 = 0
if (
get_hamming_distance_for_indexes(
sequence_1=index_2_sample_to_update,
sequence_2=index_2,
)
< MINIMUM_HAMMING_DISTANCE
):
LOG.debug(
f"Turning barcode mismatch for index 2 to 0 for sample {sample_to_update.sample_id}"
)
sample_to_update.barcode_mismatches_2 = 0


def pad_and_reverse_complement_sample_indexes(
sample: FlowCellSample, index_cycles: int, perform_reverse_complement: bool
) -> None:
"""Adapts the indexes of sample.
1. Pad indexes if needed so that all indexes have a length equal to the number of index reads
2. Takes the reverse complement of index 2 in case of the new NovaSeq software control version
(1.7) in combination with the new reagent kit (version 1.5).
3. Assigns the indexes to the sample attributes index and index2."""
index1, index2 = get_index_pair(sample=sample)
index_length = len(index1)
if isinstance(sample, FlowCellSampleBcl2Fastq) and is_padding_needed(
index_cycles=index_cycles, sample_index_length=index_length
):
LOG.debug("Padding indexes")
index1 = pad_index_one(index_string=index1)
index2 = pad_index_two(index_string=index2, reverse_complement=perform_reverse_complement)
LOG.debug(f"Padding not necessary for sample {sample.sample_id}")
if perform_reverse_complement:
index2 = get_reverse_complement_dna_seq(index2)
sample.index = index1
sample.index2 = index2


def update_indexes_for_samples(
samples: list[FlowCellSampleBCLConvert | FlowCellSampleBcl2Fastq],
index_cycles: int,
perform_reverse_complement: bool,
sequencer: str,
) -> None:
"""Updates the values to the fields index1 and index 2 of samples."""
for sample in samples:
if sequencer != Sequencers.NOVASEQ:
index1, index2 = get_index_pair(sample=sample)
sample.index = index1
sample.index2 = index2
else:
pad_and_reverse_complement_sample_indexes(
sample=sample,
index_cycles=index_cycles,
perform_reverse_complement=perform_reverse_complement,
)
96 changes: 0 additions & 96 deletions cg/apps/demultiplex/sample_sheet/models.py

This file was deleted.

9 changes: 3 additions & 6 deletions cg/apps/demultiplex/sample_sheet/read_sample_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@

from pydantic import TypeAdapter

from cg.apps.demultiplex.sample_sheet.models import (
from cg.apps.demultiplex.sample_sheet.sample_models import (
FlowCellSample,
FlowCellSampleBcl2Fastq,
FlowCellSampleBCLConvert,
SampleSheet,
)
from cg.apps.demultiplex.sample_sheet.sample_sheet_models import SampleSheet
from cg.constants.constants import FileFormat
from cg.constants.demultiplexing import (
SampleSheetBcl2FastqSections,
SampleSheetBCLConvertSections,
)
from cg.constants.demultiplexing import SampleSheetBcl2FastqSections, SampleSheetBCLConvertSections
from cg.exc import SampleSheetError
from cg.io.controller import ReadFile

Expand Down
Loading

0 comments on commit 3f9afe1

Please sign in to comment.