diff --git a/cg/apps/demultiplex/sample_sheet/api.py b/cg/apps/demultiplex/sample_sheet/api.py new file mode 100644 index 0000000000..7527646e50 --- /dev/null +++ b/cg/apps/demultiplex/sample_sheet/api.py @@ -0,0 +1,141 @@ +import logging +import os +from pathlib import Path + +from cg.apps.housekeeper.hk import HousekeeperAPI +from cg.constants.constants import FileFormat +from cg.constants.demultiplexing import BclConverter +from cg.exc import FlowCellError, HousekeeperFileMissingError, SampleSheetError +from cg.io.controller import ReadFile, WriteFile +from cg.models.cg_config import CGConfig +from cg.models.flow_cell.flow_cell import FlowCellDirectoryData + +LOG = logging.getLogger(__name__) + + +class SampleSheetAPI: + """Sample Sheet API class.""" + + def __init__(self, config: CGConfig) -> None: + self.config: CGConfig = config + self.flow_cell_runs_dir = Path(config.illumina_flow_cells_directory) + self.hk_api: HousekeeperAPI = config.housekeeper_api + self.dry_run: bool = False + self.force: bool = False + + def set_dry_run(self, dry_run: bool) -> None: + """Set dry run.""" + LOG.debug(f"Set dry run to {dry_run}") + self.dry_run = dry_run + + def set_force(self, force: bool) -> None: + """Set force.""" + LOG.debug(f"Set force to {force}") + self.force = force + + def get_flow_cell(self, flow_cell_name: str, bcl_converter: str) -> FlowCellDirectoryData: + """ + Return a flow cell given a path and the bcl converter. + Raises: + SampleSheetError: If the flow cell directory or the data it contains is not valid. + """ + flow_cell_path: Path = Path(self.flow_cell_runs_dir, flow_cell_name) + if not flow_cell_path.exists(): + LOG.warning(f"Could not find flow cell {flow_cell_path}") + raise SampleSheetError(f"Could not find flow cell {flow_cell_path}") + try: + flow_cell = FlowCellDirectoryData( + flow_cell_path=flow_cell_path, bcl_converter=bcl_converter + ) + except FlowCellError as error: + raise SampleSheetError from error + return flow_cell + + def get_valid_sample_sheet_path(self, sample_sheet_path: Path) -> Path | None: + """Return the sample sheet path if it exists and if it passes validation.""" + if sample_sheet_path and sample_sheet_path.exists(): + try: + self.validate_from_path(sample_sheet_path) + except SampleSheetError: + LOG.warning(f"Sample sheet {sample_sheet_path} was not valid") + return + return sample_sheet_path + + def get_valid_sample_sheet_path_from_hk(self, flow_cell_id: str) -> Path | None: + """Return the sample sheet path from Housekeeper if is valid and exists.""" + try: + sample_sheet_path: Path | None = self.hk_api.get_sample_sheet_path(flow_cell_id) + except HousekeeperFileMissingError: + LOG.warning(f"Sample sheet for flow cell {flow_cell_id} not found in Housekeeper") + return + return self.get_valid_sample_sheet_path(sample_sheet_path) + + def get_sample_sheet_content(self, flow_cell: FlowCellDirectoryData) -> list[list[str]]: + """Return the sample sheet content for a flow cell.""" + pass + + def validate_from_content(self, content: list[list[str]]) -> None: + """ + Validate a sample sheet given its content. + Raises: + SampleSheetError: If the sample sheet is not valid. + """ + pass + + def validate_from_path(self, path: Path) -> None: + """ + Validate a sample sheet given the path to the file. + Raises: + SampleSheetError: If the sample sheet is not valid. + """ + content: list[list[str]] = ReadFile.get_content_from_file( + file_format=FileFormat.CSV, file_path=path + ) + self.validate_from_content(content) + + def create_sample_sheet(self, flow_cell: FlowCellDirectoryData) -> None: + """Create a valid sample sheet in the flow cell directory and add it to Housekeeper.""" + sample_sheet_content: list[list[str]] = self.get_sample_sheet_content(flow_cell) + if not self.force: + self.validate_from_content(sample_sheet_content) + WriteFile.write_file_from_content( + content=sample_sheet_content, + file_format=FileFormat.CSV, + file_path=flow_cell.sample_sheet_path, + ) + # TODO: REPLACE FOR ADDING AND INCLUDING TO HK + + def get_or_create_sample_sheet(self, flow_cell_name: str, bcl_converter: str) -> None: + """ + Ensure that a valid sample sheet is present in the flow cell directory. + If a valid sample sheet for the flow cell is present in Housekeeper, the function hard-links + it to the flow cell directory. If not and if the flow cell directory has a valid sample + sheet, the function adds and includes it to Housekeeper. If neither is present, the + function creates a sample sheet for the flow cell and adds and includes it to Housekeeper. + """ + flow_cell: FlowCellDirectoryData = self.get_flow_cell( + flow_cell_name=flow_cell_name, bcl_converter=bcl_converter + ) + if hk_sample_sheet_path := self.get_valid_sample_sheet_path_from_hk(flow_cell.id): + LOG.debug( + "Sample sheet already exists in Housekeeper. Hard-linking it to flow cell directory" + ) + if not self.dry_run: + os.link(src=hk_sample_sheet_path, dst=flow_cell.sample_sheet_path) + return + elif self.get_valid_sample_sheet_path(flow_cell.sample_sheet_path): + LOG.info("Sample sheet already exists in flow cell directory") + if not self.dry_run: + # TODO: REPLACE FOR ADDING AND INCLUDING TO HK + return + else: + self.create_sample_sheet(flow_cell) + + def create_all_sample_sheets(self): + """Create sample sheets for all flow cells.""" + for flow_cell_dir in self.flow_cell_runs_dir.iterdir(): + try: + self.get_or_create_sample_sheet(flow_cell_dir.name, bcl_converter=BclConverter.BCL2FASTQ) + except SampleSheetError as error: + LOG.error(f"Could not create sample sheet for {flow_cell_dir.name}: {error}") + continue diff --git a/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py b/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py index d00abc0082..72e69a4c5c 100644 --- a/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py +++ b/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py @@ -5,14 +5,12 @@ from typing import Type from cg.apps.demultiplex.sample_sheet.index import Index, get_valid_indexes, is_dual_index -from cg.apps.demultiplex.sample_sheet.read_sample_sheet import ( - get_samples_by_lane, - get_validated_sample_sheet, -) +from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_samples_by_lane from cg.apps.demultiplex.sample_sheet.sample_models import ( FlowCellSampleBcl2Fastq, FlowCellSampleBCLConvert, ) +from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import SampleSheetValidator from cg.constants.demultiplexing import ( BclConverter, IndexSettings, @@ -39,9 +37,9 @@ def __init__( self.flow_cell_id: str = flow_cell.id self.lims_samples: list[FlowCellSampleBCLConvert | FlowCellSampleBcl2Fastq] = lims_samples self.run_parameters: RunParameters = flow_cell.run_parameters - self.sample_type: Type[FlowCellSampleBCLConvert | FlowCellSampleBcl2Fastq] = ( - flow_cell.sample_type - ) + self.sample_type: Type[ + FlowCellSampleBCLConvert | FlowCellSampleBcl2Fastq + ] = flow_cell.sample_type self.force: bool = force self.index_settings: IndexSettings = self.run_parameters.index_settings @@ -124,10 +122,7 @@ def construct_sample_sheet(self) -> list[list[str]]: LOG.info("Skipping validation of sample sheet due to force flag") return sample_sheet_content LOG.info("Validating sample sheet") - get_validated_sample_sheet( - sample_sheet_content=sample_sheet_content, - sample_type=self.sample_type, - ) + SampleSheetValidator(content=sample_sheet_content).validate_sample_sheet() LOG.info("Sample sheet passed validation") return sample_sheet_content diff --git a/cg/apps/demultiplex/sample_sheet/sample_sheet_validator.py b/cg/apps/demultiplex/sample_sheet/sample_sheet_validator.py new file mode 100644 index 0000000000..ac4595b639 --- /dev/null +++ b/cg/apps/demultiplex/sample_sheet/sample_sheet_validator.py @@ -0,0 +1,313 @@ +"""Module with validator classes for the sample sheet.""" + +import logging +import re +from pathlib import Path +from typing import Type + +from pydantic import TypeAdapter + +from cg.apps.demultiplex.sample_sheet.read_sample_sheet import ( + get_raw_samples, + validate_samples_unique_per_lane, +) +from cg.apps.demultiplex.sample_sheet.sample_models import ( + FlowCellSample, + FlowCellSampleBcl2Fastq, + FlowCellSampleBCLConvert, +) +from cg.constants.constants import FileFormat +from cg.constants.demultiplexing import ( + NAME_TO_INDEX_SETTINGS, + SampleSheetBcl2FastqSections, + SampleSheetBCLConvertSections, +) +from cg.exc import OverrideCyclesError, SampleSheetError +from cg.io.controller import ReadFile + +LOG = logging.getLogger(__name__) + +FORWARD_INDEX_CYCLE_PATTERN: str = r"I(\d+)N(\d+)" +REVERSE_INDEX_CYCLE_PATTERN: str = r"N(\d+)I(\d+)" + + +class SampleSheetValidator: + """Class for validating the content of a sample sheet.""" + + def __init__(self, path: Path | None = None, content: list[list[str]] | None = None): + """Instantiate the class with a sample sheet file path or sample sheet content.""" + if content: + self.content: list[list[str]] = content + elif path and path.exists(): + self.content: list[list[str]] = ReadFile.get_content_from_file( + file_format=FileFormat.CSV, file_path=path + ) + else: + raise SampleSheetError(f"Provide a valid content or an existing sample sheet file.") + self.sample_type: Type[FlowCellSample] = self._get_sample_type() + self.read1_cycles: int | None = None + self.read2_cycles: int | None = None + self.index1_cycles: int | None = None + self.index2_cycles: int | None = None + self.is_index2_reverse_complement: bool | None = None + + def _get_sample_type(self) -> Type[FlowCellSample]: + """Return the sample type identified from the sample sheet content.""" + for row in self.content: + if not row: + continue + if SampleSheetBCLConvertSections.Data.HEADER in row[0]: + LOG.info("Sample sheet was generated for BCL Convert") + return FlowCellSampleBCLConvert + if SampleSheetBcl2FastqSections.Data.HEADER in row[0]: + LOG.info("Sample sheet was generated for BCL2FASTQ") + return FlowCellSampleBcl2Fastq + message: str = "Could not determine sample sheet type" + LOG.error(message) + raise SampleSheetError(message) + + def validate_all_sections_present(self) -> None: + """ + Returns whether the sample sheet has the four mandatory sections: + - Header + - Reads + - BCLConvert Settings + - BCLConvert Data + Raises: SampleSheetError if the sample sheet does not have all the sections. + """ + has_header: bool = [SampleSheetBCLConvertSections.Header.HEADER] in self.content + has_cycles: bool = [SampleSheetBCLConvertSections.Reads.HEADER] in self.content + has_settings: bool = [SampleSheetBCLConvertSections.Settings.HEADER] in self.content + has_data: bool = [SampleSheetBCLConvertSections.Data.HEADER] in self.content + if not all([has_header, has_cycles, has_settings, has_data]): + message: str = "Sample sheet does not have all the necessary sections" + LOG.error(message) + raise SampleSheetError(message) + + def _get_index_settings_name(self) -> str: + """Return the index settings from the sample sheet's header.""" + for row in self.content: + if SampleSheetBCLConvertSections.Header.INDEX_SETTINGS in row: + return row[1] + message: str = "No index settings found in sample sheet" + LOG.error(message) + raise SampleSheetError(message) + + def set_is_index2_reverse_complement(self) -> None: + """Return whether the index2 override cycles value is reverse-complemented.""" + settings_name: str = self._get_index_settings_name() + self.is_index2_reverse_complement = NAME_TO_INDEX_SETTINGS[ + settings_name + ].are_i5_override_cycles_reverse_complemented + + def _get_cycle(self, cycle_name: str, nullable: bool = False) -> int | None: + """ + Return the cycle from the sample sheet given the cycle name. Set nullable to True to + return None if the cycle is not found. + Raises: + SampleSheetError if the cycle is not found and nullable is False. + """ + for row in self.content: + if cycle_name in row: + return int(row[1]) + if not nullable: + message: str = f"No {cycle_name} found in sample sheet" + LOG.error(message) + raise SampleSheetError(message) + + def set_cycles(self): + """Set values to the run cycle attributes.""" + self.read1_cycles = self._get_cycle(SampleSheetBCLConvertSections.Reads.READ_CYCLES_1) + self.read2_cycles = self._get_cycle(SampleSheetBCLConvertSections.Reads.READ_CYCLES_2) + self.index1_cycles = self._get_cycle(SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_1) + self.index2_cycles = self._get_cycle( + cycle_name=SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_2, nullable=True + ) + + def validate_samples(self) -> None: + """ + Determine if the samples have the correct attributes and are not unique per lane. + Raises: + ValidationError if the samples do not have the correct attributes based on their model. + SampleSheetError if the samples are not unique per lane. + """ + raw_samples: list[dict[str, str]] = get_raw_samples(self.content) + validated_samples = TypeAdapter(list[self.sample_type]).validate_python(raw_samples) + validate_samples_unique_per_lane(validated_samples) + + def validate_override_cycles(self) -> None: + """Determine if the samples' override cycles are valid. + Raises: + SampleSheetError if any of the samples' override cycles are not valid. + """ + samples: list[dict[str, str]] = get_raw_samples(self.content) + validator = OverrideCyclesValidator( + run_read1_cycles=self.read1_cycles, + run_read2_cycles=self.read2_cycles, + run_index1_cycles=self.index1_cycles, + run_index2_cycles=self.index2_cycles, + is_reverse_complement=self.is_index2_reverse_complement, + ) + for sample in samples: + try: + validator.validate_sample(sample) + except OverrideCyclesError as error: + raise SampleSheetError from error + + def validate_bcl_convert(self): + """Determine if the BCLConvert sample sheet is valid, which means: + - All sections are present + - The index settings are specified in the sample sheet header + - The read and index cycles are specified in the sample sheet's reads section + - The samples have the correct attributes + - The override cycles are valid + """ + self.validate_all_sections_present() + self.set_is_index2_reverse_complement() + self.set_cycles() + self.validate_samples() + self.validate_override_cycles() + + def validate_sample_sheet(self): + """Call the proper validation depending of the sample sheet type.""" + if self.sample_type is FlowCellSampleBCLConvert: + self.validate_bcl_convert() + else: + self.validate_samples() + + +class OverrideCyclesValidator: + """Class for validating the override cycles value of a sample in a sample sheet.""" + + def __init__( + self, + run_read1_cycles: int, + run_read2_cycles: int, + run_index1_cycles: int, + run_index2_cycles: int | None, + is_reverse_complement: bool, + ): + self.sample: dict[str, str] | None = None + self.sample_cycles: list[str] | None = None + self.sample_id: str | None = None + self.run_read1_cycles: int = run_read1_cycles + self.run_read2_cycles: int = run_read2_cycles + self.run_index1_cycles: int = run_index1_cycles + self.run_index2_cycles: int | None = run_index2_cycles + self.is_reverse_complement: bool = is_reverse_complement + + @staticmethod + def is_index_cycle_value_following_pattern( + pattern: str, index_cycle: str, run_cycles: int, index_sequence: str + ) -> bool: + """ + Returns whether an index cycle string is following a valid cycle regex pattern and has + consistent values. Valid patterns are 'I(\d+)N(\d+)' and 'N(\d+)I(\d+)'. Having consistent + values means that the sum of the number of index characters (I) and the number of ignored + characters (N) specified in the index cycle string is equal to the number of run cycles + and the length of the index sequence is equal to the number of index characters (I). + """ + match = re.match(pattern, index_cycle) + if match: + if pattern == FORWARD_INDEX_CYCLE_PATTERN: + index_chars, ignored_chars = map(int, match.groups()) + elif pattern == REVERSE_INDEX_CYCLE_PATTERN: + ignored_chars, index_chars = map(int, match.groups()) + else: + LOG.warning(f"Pattern {pattern} is not a valid index cycle pattern") + return False + if index_chars + ignored_chars == run_cycles and len(index_sequence) == index_chars: + return True + return False + + def _validate_reads_cycles(self) -> None: + """ + Determine if the sample read cycles are valid, i.e. if the sample read cycle values are + equal to the respective run read cycles. + Raises: + OverrideCyclesError if the reads cycles are not valid. + """ + read1_cycle: str = self.sample_cycles[0] + read2_cycle: str = self.sample_cycles[-1] + if ( + read1_cycle == f"Y{self.run_read1_cycles}" + and read2_cycle == f"Y{self.run_read2_cycles}" + ): + return + message: str = f"Incorrect read cycles {self.sample_cycles} for sample {self.sample_id}" + LOG.error(message) + raise OverrideCyclesError(message) + + def _validate_index1_cycles(self) -> None: + """ + Determine if the sample index 1 cycle is valid, i.e., if the number of index characters in + the override cycles coincides with the length of the index sequence and if the number of + ignored characters in the override cycles matches the difference between the length of the + index sequence and the number of run index1 cycles. + Raises: + OverrideCyclesError if the index 1 cycle is not valid. + """ + index1_cycle: str = self.sample_cycles[1] + if ( + self.run_index1_cycles == len(self.sample["Index"]) + and index1_cycle == f"I{self.run_index1_cycles}" + ): + return + if self.is_index_cycle_value_following_pattern( + pattern=FORWARD_INDEX_CYCLE_PATTERN, + index_cycle=index1_cycle, + run_cycles=self.run_index1_cycles, + index_sequence=self.sample["Index"], + ): + return + message: str = f"Incorrect index1 cycle {index1_cycle} for sample {self.sample_id}" + LOG.error(message) + raise OverrideCyclesError(message) + + def _validate_index2_cycles(self) -> None: + """ + Determine if the index 2 cycle is valid, i.e., if the number of ignored and index characters + correspond to the length of the sample index2 sequence and the number of run index2 cycles, + or if the index cycles should be None. + Raises: + OverrideCyclesError if the index 2 cycle is not valid. + """ + if not self.run_index2_cycles and len(self.sample_cycles) == 3: + return + index2_cycle: str = self.sample_cycles[2] + if not self.sample["Index2"] and index2_cycle == f"N{self.run_index2_cycles}": + return + if ( + self.run_index2_cycles == len(self.sample["Index2"]) + and index2_cycle == f"I{self.run_index2_cycles}" + ): + return + if self.is_reverse_complement and self.is_index_cycle_value_following_pattern( + pattern=REVERSE_INDEX_CYCLE_PATTERN, + index_cycle=index2_cycle, + run_cycles=self.run_index2_cycles, + index_sequence=self.sample["Index2"], + ): + return + if not self.is_reverse_complement and self.is_index_cycle_value_following_pattern( + pattern=FORWARD_INDEX_CYCLE_PATTERN, + index_cycle=index2_cycle, + run_cycles=self.run_index2_cycles, + index_sequence=self.sample["Index2"], + ): + return + message: str = f"Incorrect index2 cycle {index2_cycle} for sample {self.sample_id}" + LOG.error(message) + raise OverrideCyclesError(message) + + def validate_sample( + self, + sample: dict[str, str], + ) -> None: + """Determine if the override cycles are valid for a given sample.""" + self.sample = sample + self.sample_cycles = sample["OverrideCycles"].split(";") + self.sample_id = sample["Sample_ID"] + self._validate_reads_cycles() + self._validate_index1_cycles() + self._validate_index2_cycles() diff --git a/cg/cli/demultiplex/sample_sheet.py b/cg/cli/demultiplex/sample_sheet.py index 29f32b9cd8..25720e504a 100644 --- a/cg/cli/demultiplex/sample_sheet.py +++ b/cg/cli/demultiplex/sample_sheet.py @@ -6,19 +6,15 @@ from pydantic import ValidationError from cg.apps.demultiplex.sample_sheet.create import create_sample_sheet -from cg.apps.demultiplex.sample_sheet.read_sample_sheet import ( - get_sample_sheet_from_file, -) from cg.apps.demultiplex.sample_sheet.sample_models import FlowCellSample +from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import SampleSheetValidator from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims.sample_sheet import get_flow_cell_samples from cg.constants.constants import DRY_RUN, FileFormat from cg.constants.demultiplexing import OPTION_BCL_CONVERTER from cg.exc import FlowCellError, HousekeeperFileMissingError, SampleSheetError from cg.io.controller import WriteFile, WriteStream -from cg.meta.demultiplex.housekeeper_storage_functions import ( - add_sample_sheet_path_to_housekeeper, -) +from cg.meta.demultiplex.housekeeper_storage_functions import add_sample_sheet_path_to_housekeeper from cg.models.cg_config import CGConfig from cg.models.flow_cell.flow_cell import FlowCellDirectoryData @@ -36,9 +32,10 @@ def validate_sample_sheet(sheet: click.Path): """Validate a sample sheet.""" LOG.info(f"Validating {sheet} sample sheet") try: - get_sample_sheet_from_file(Path(sheet)) - except ValidationError as error: - LOG.warning(error) + validator = SampleSheetValidator(Path(sheet)) + validator.validate_sample_sheet() + except (ValidationError, SampleSheetError) as error: + LOG.error(error) raise click.Abort from error LOG.info("Sample sheet passed validation") diff --git a/cg/constants/demultiplexing.py b/cg/constants/demultiplexing.py index b5c1ad4e94..19135b9caf 100644 --- a/cg/constants/demultiplexing.py +++ b/cg/constants/demultiplexing.py @@ -272,3 +272,9 @@ class IndexSettings(BaseModel): should_i5_be_reverse_complemented=False, are_i5_override_cycles_reverse_complemented=False, ) + +NAME_TO_INDEX_SETTINGS: dict[str, IndexSettings] = { + "NovaSeqX": NOVASEQ_X_INDEX_SETTINGS, + "NovaSeq6000Post1.5Kits": NOVASEQ_6000_POST_1_5_KITS_INDEX_SETTINGS, + "NoReverseComplements": NO_REVERSE_COMPLEMENTS_INDEX_SETTINGS, +} diff --git a/cg/exc.py b/cg/exc.py index 023e514691..65edb63689 100644 --- a/cg/exc.py +++ b/cg/exc.py @@ -260,3 +260,7 @@ class XMLError(CgError): class OrderNotFoundError(CgError): """Exception raised when an order is not found.""" + + +class OverrideCyclesError(CgError): + """Exception raised when the override cycles are not correct.""" diff --git a/cg/meta/demultiplex/utils.py b/cg/meta/demultiplex/utils.py index bb9cf23932..d81d6f2d03 100644 --- a/cg/meta/demultiplex/utils.py +++ b/cg/meta/demultiplex/utils.py @@ -3,13 +3,10 @@ import re from pathlib import Path -from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_sample_sheet_from_file -from cg.apps.demultiplex.sample_sheet.sample_sheet_models import SampleSheet from cg.constants.constants import FileExtensions from cg.constants.demultiplexing import DemultiplexingDirsAndFiles from cg.constants.sequencing import FLOWCELL_Q30_THRESHOLD, Sequencers from cg.io.csv import read_csv, write_csv -from cg.models.flow_cell.flow_cell import FlowCellDirectoryData from cg.utils.files import ( get_file_in_directory, get_files_matching_pattern, @@ -175,13 +172,6 @@ def rename_fastq_file_if_needed(fastq_file_path: Path, flow_cell_name: str) -> P return renamed_fastq_file_path -def get_sample_sheet(flow_cell: FlowCellDirectoryData) -> SampleSheet: - """Return sample sheet associated with flowcell.""" - sample_sheet_path: Path = flow_cell.get_sample_sheet_path_hk() - sample_sheet: SampleSheet = get_sample_sheet_from_file(sample_sheet_path) - return sample_sheet - - def get_undetermined_fastqs(lane: int, flow_cell_path: Path) -> list[Path]: """Get the undetermined fastq files for a specific lane on a flow cell.""" undetermined_pattern = f"Undetermined*_L00{lane}_*{FileExtensions.FASTQ}{FileExtensions.GZIP}" diff --git a/cg/models/flow_cell/flow_cell.py b/cg/models/flow_cell/flow_cell.py index 3c67003bed..429fa3e244 100644 --- a/cg/models/flow_cell/flow_cell.py +++ b/cg/models/flow_cell/flow_cell.py @@ -9,14 +9,13 @@ from pydantic import ValidationError from typing_extensions import Literal -from cg.apps.demultiplex.sample_sheet.read_sample_sheet import ( - get_sample_sheet_from_file, -) +from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_sample_sheet_from_file from cg.apps.demultiplex.sample_sheet.sample_models import ( FlowCellSampleBcl2Fastq, FlowCellSampleBCLConvert, ) from cg.apps.demultiplex.sample_sheet.sample_sheet_models import SampleSheet +from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import SampleSheetValidator from cg.cli.demultiplex.copy_novaseqx_demultiplex_data import get_latest_analysis_path from cg.constants.bcl_convert_metrics import SAMPLE_SHEET_HEADER from cg.constants.constants import LENGTH_LONG_DATE @@ -217,7 +216,8 @@ def sample_sheet_exists(self) -> bool: def validate_sample_sheet(self) -> bool: """Validate if sample sheet is on correct format.""" try: - get_sample_sheet_from_file(self.sample_sheet_path) + validator = SampleSheetValidator(self.sample_sheet_path) + validator.validate_sample_sheet() except (SampleSheetError, ValidationError) as error: LOG.warning("Invalid sample sheet") LOG.warning(error) @@ -235,10 +235,6 @@ def sample_sheet(self) -> SampleSheet: raise FlowCellError("Sample sheet path has not been assigned yet") return get_sample_sheet_from_file(self._sample_sheet_path_hk) - def get_sample_sheet(self) -> SampleSheet: - """Return sample sheet object.""" - return get_sample_sheet_from_file(self.sample_sheet_path) - def is_sequencing_done(self) -> bool: """Check if sequencing is done. This is indicated by that the file RTAComplete.txt exists. diff --git a/tests/apps/demultiplex/conftest.py b/tests/apps/demultiplex/conftest.py index f49ff38bb6..492f8f35d1 100644 --- a/tests/apps/demultiplex/conftest.py +++ b/tests/apps/demultiplex/conftest.py @@ -6,9 +6,7 @@ FlowCellSampleBcl2Fastq, FlowCellSampleBCLConvert, ) -from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import ( - SampleSheetCreatorBcl2Fastq, -) +from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import SampleSheetCreatorBcl2Fastq from cg.constants.demultiplexing import SampleSheetBcl2FastqSections from cg.models.flow_cell.flow_cell import FlowCellDirectoryData @@ -316,18 +314,12 @@ def bcl_convert_flow_cell_sample(raw_index_sequence: str) -> FlowCellSampleBCLCo @pytest.fixture -def bcl_convert_sample_sheet_path(illumina_demultiplexed_runs_directory): - return Path( - illumina_demultiplexed_runs_directory, - "230504_A00689_0804_BHY7FFDRX2", - "SampleSheet.csv", - ) +def sample_sheet_content_only_headers() -> list[list[str]]: + """Return a sample sheet content with only headers.""" + return [["[Header]"], ["[Reads]"], ["[BCLConvert_Settings]"], ["[BCLConvert_Data]"]] @pytest.fixture -def bcl2fastq_sample_sheet_path(illumina_demultiplexed_runs_directory): - return Path( - illumina_demultiplexed_runs_directory, - "170407_ST-E00198_0209_BHHKVCALXX", - "SampleSheet.csv", - ) +def sample_sheet_content_missing_data_header() -> list[list[str]]: + """Return a sample sheet content with only headers.""" + return [["[Header]"], ["[Reads]"], ["[BCLConvert_Settings]"]] diff --git a/tests/apps/demultiplex/test_override_cycles_validator.py b/tests/apps/demultiplex/test_override_cycles_validator.py new file mode 100644 index 0000000000..7b8a06b057 --- /dev/null +++ b/tests/apps/demultiplex/test_override_cycles_validator.py @@ -0,0 +1,44 @@ +import pytest +from _pytest.fixtures import FixtureRequest + +from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import ( + FORWARD_INDEX_CYCLE_PATTERN, + REVERSE_INDEX_CYCLE_PATTERN, + OverrideCyclesValidator, +) + + +@pytest.mark.skip(reason="Test is not implemented") +@pytest.mark.parametrize( + "pattern, index_cycle, run_cycles, expected", + [(FORWARD_INDEX_CYCLE_PATTERN, "I8N2", 10, True)], +) +def test_is_index_cycle_value_following_pattern( + pattern: str, + index_cycle: str, + run_cycles: int, + expected: bool, + index1_sequence_from_lims: str, + request: FixtureRequest, +): + """Test that index cycles are recognised following a pattern.""" + # GIVEN an override cycles validator, a pattern and a sample + reverse_complement: bool = pattern == REVERSE_INDEX_CYCLE_PATTERN + validator = OverrideCyclesValidator( + run_read1_cycles=151, + run_read2_cycles=151, + run_index1_cycles=run_cycles, + run_index2_cycles=None, + is_reverse_complement=reverse_complement, + ) + + # WHEN checking if the index cycle value is following the pattern + result: bool = validator.is_index_cycle_value_following_pattern( + pattern=pattern, + index_cycle=index_cycle, + run_cycles=validator.run_index1_cycles, + index_sequence=index1_sequence_from_lims, + ) + + # THEN assert that the index cycles are following the pattern + assert result == expected diff --git a/tests/apps/demultiplex/test_read_sample_sheet.py b/tests/apps/demultiplex/test_read_sample_sheet.py index 900ea38001..7197de3d81 100644 --- a/tests/apps/demultiplex/test_read_sample_sheet.py +++ b/tests/apps/demultiplex/test_read_sample_sheet.py @@ -1,5 +1,6 @@ import logging from pathlib import Path +from typing import Type import pytest @@ -180,11 +181,13 @@ def test_get_sample_sheet_dragen_duplicate_different_lanes( assert sample_sheet.samples -def test_get_sample_internal_ids_from_sample_sheet(novaseq6000_bcl_convert_sample_sheet_path: Path): +def test_get_sample_internal_ids_from_sample_sheet( + novaseq_6000_post_1_5_kits_correct_sample_sheet_path: Path, +): """Test that getting sample internal ids from a sample sheet returns a unique list of strings.""" # GIVEN a sample sheet with only valid samples sample_sheet: SampleSheet = get_sample_sheet_from_file( - novaseq6000_bcl_convert_sample_sheet_path + novaseq_6000_post_1_5_kits_correct_sample_sheet_path ) # WHEN getting the valid sample internal ids @@ -201,21 +204,27 @@ def test_get_sample_internal_ids_from_sample_sheet(novaseq6000_bcl_convert_sampl assert is_valid_sample_internal_id(sample_internal_id=sample_internal_id) is True -def test_get_sample_type_for_bcl_convert(bcl_convert_sample_sheet_path: Path): +def test_get_sample_type_for_bcl_convert( + novaseq_6000_post_1_5_kits_correct_sample_sheet_path: Path, +): # GIVEN a bcl convert sample sheet path # WHEN getting the sample type - sample_type: FlowCellSample = get_sample_type(bcl_convert_sample_sheet_path) + sample_type: Type[FlowCellSample] = get_sample_type( + novaseq_6000_post_1_5_kits_correct_sample_sheet_path + ) # THEN the sample type is FlowCellSampleBCLConvert assert sample_type is FlowCellSampleBCLConvert -def test_get_sample_type_for_bcl2fastq(bcl2fastq_sample_sheet_path: Path): +def test_get_sample_type_for_bcl2fastq(hiseq_2500_custom_index_bcl2fastq_sample_sheet: Path): # GIVEN a bcl convert sample sheet path # WHEN getting the sample type - sample_type: FlowCellSample = get_sample_type(bcl2fastq_sample_sheet_path) + sample_type: Type[FlowCellSample] = get_sample_type( + hiseq_2500_custom_index_bcl2fastq_sample_sheet + ) # THEN the sample type is FlowCellSampleBCLConvert assert sample_type is FlowCellSampleBcl2Fastq diff --git a/tests/apps/demultiplex/test_sample_sheet_validator.py b/tests/apps/demultiplex/test_sample_sheet_validator.py new file mode 100644 index 0000000000..606c2a2183 --- /dev/null +++ b/tests/apps/demultiplex/test_sample_sheet_validator.py @@ -0,0 +1,236 @@ +from typing import Type + +import pytest +from _pytest.fixtures import FixtureRequest + +from cg.apps.demultiplex.sample_sheet.sample_models import ( + FlowCellSample, + FlowCellSampleBcl2Fastq, + FlowCellSampleBCLConvert, +) +from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import SampleSheetValidator +from cg.constants.demultiplexing import SampleSheetBCLConvertSections +from cg.exc import SampleSheetError + + +@pytest.mark.parametrize( + "sample_sheet_validator, expected_sample_type", + [ + ("hiseq_x_single_index_sample_sheet_validator", FlowCellSampleBCLConvert), + ("hiseq_x_single_index_bcl2fastq_sample_sheet_validator", FlowCellSampleBcl2Fastq), + ], +) +def test_get_sample_type_correct_sample_sheet( + sample_sheet_validator: str, expected_sample_type: Type[FlowCellSample], request: FixtureRequest +): + """Test that the correct sample type is returned.""" + # GIVEN a sample sheet validator + validator: SampleSheetValidator = request.getfixturevalue(sample_sheet_validator) + + # WHEN getting the sample type + sample_type: Type = validator._get_sample_type() + + # THEN the correct sample type is returned + assert sample_type == expected_sample_type + + +def test_get_sample_type_incorrect_sample_sheet( + novaseq_x_sample_sheet_validator: SampleSheetValidator, caplog +): + """Test that the correct sample type is returned.""" + # GIVEN the content of an invalid sample sheet + novaseq_x_sample_sheet_validator.content = [["invalid", "content"], ["sample", "sheet"]] + + # WHEN getting the sample type + with pytest.raises(SampleSheetError): + # THEN a SampleSheetError is raised + novaseq_x_sample_sheet_validator._get_sample_type() + assert "Could not determine sample sheet type" in caplog.text + + +def test_validate_all_sections_present( + novaseq_x_sample_sheet_validator: SampleSheetValidator, + sample_sheet_content_only_headers: list[list[str]], +): + """Test that when all sections are present in the sample sheet the validation passes.""" + # GIVEN a sample sheet content with all the required sections and a sample sheet validator + novaseq_x_sample_sheet_validator.content = sample_sheet_content_only_headers + assert len(novaseq_x_sample_sheet_validator.content) == 4 + + # WHEN validating the sections of the sample sheet + novaseq_x_sample_sheet_validator.validate_all_sections_present() + + # THEN no error is raised + + +def test_validate_all_sections_present_missing_section( + novaseq_x_sample_sheet_validator: SampleSheetValidator, + sample_sheet_content_missing_data_header: list[list[str]], + caplog: pytest.LogCaptureFixture, +): + """Test that when one sections is missing in the sample sheet the validation fails.""" + # GIVEN a sample sheet content with a missing section and a sample sheet validator + novaseq_x_sample_sheet_validator.content = sample_sheet_content_missing_data_header + assert len(novaseq_x_sample_sheet_validator.content) == 3 + + # WHEN validating the sections of the sample sheet + with pytest.raises(SampleSheetError): + # THEN a SampleSheetError is raised + novaseq_x_sample_sheet_validator.validate_all_sections_present() + assert "Sample sheet does not have all the necessary sections" in caplog.text + + +@pytest.mark.parametrize( + "sample_sheet_validator, expected_index_settings_name", + [ + ("novaseq_6000_pre_1_5_kits_sample_sheet_validator", "NoReverseComplements"), + ("novaseq_6000_post_1_5_kits_sample_sheet_validator", "NovaSeq6000Post1.5Kits"), + ("novaseq_x_sample_sheet_validator", "NovaSeqX"), + ], + ids=["NovaSeq6000Pre1.5Kits", "NovaSeq6000Post1.5Kits", "NovaSeqX"], +) +def test_get_index_settings_name( + sample_sheet_validator: str, expected_index_settings_name: str, request: FixtureRequest +): + """Test that the correct index settings name is returned.""" + # GIVEN a sample sheet validator + validator: SampleSheetValidator = request.getfixturevalue(sample_sheet_validator) + + # WHEN getting the index settings name + index_settings_name: str = validator._get_index_settings_name() + + # THEN the correct index settings name is returned + assert index_settings_name == expected_index_settings_name + + +def test_get_index_settings_name_missing_index_settings( + novaseq_x_sample_sheet_validator: SampleSheetValidator, caplog +): + """Test that getting the index settings from a sample sheet without it fails.""" + # GIVEN the content of a sample sheet without index settings + novaseq_x_sample_sheet_validator.content.pop(5) + assert ( + not [SampleSheetBCLConvertSections.Header.INDEX_SETTINGS.value, "NovaSeqX"] + in novaseq_x_sample_sheet_validator.content + ) + + # WHEN getting the index settings name + with pytest.raises(SampleSheetError): + # THEN a SampleSheetError is raised + novaseq_x_sample_sheet_validator._get_index_settings_name() + assert "No index settings found in sample sheet" in caplog.text + + +@pytest.mark.parametrize( + "sample_sheet_validator, expected_reverse_complement", + [ + ("novaseq_6000_pre_1_5_kits_sample_sheet_validator", False), + ("novaseq_6000_post_1_5_kits_sample_sheet_validator", False), + ("novaseq_x_sample_sheet_validator", True), + ], + ids=["NovaSeq6000Pre1.5Kits", "NovaSeq6000Post1.5Kits", "NovaSeqX"], +) +def test_set_is_index2_reverse_complement( + sample_sheet_validator: str, expected_reverse_complement: bool, request: FixtureRequest +): + """Test that the correct value for index2 reverse complement value is set.""" + # GIVEN a sample sheet validator + validator: SampleSheetValidator = request.getfixturevalue(sample_sheet_validator) + + # WHEN setting the index2 reverse complement value + validator.set_is_index2_reverse_complement() + + # THEN the correct value is set + assert validator.is_index2_reverse_complement == expected_reverse_complement + + +@pytest.mark.parametrize( + "sample_sheet_content, nullable, expected", + [ + ([[SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_2, 10]], False, 10), + ([[SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_2, 10]], True, 10), + ([["not_a_cycle", 10]], True, None), + ], + ids=["index2_cycles", "index2_cycles_nullable", "not_a_cycle_nullable"], +) +def test_get_cycle( + sample_sheet_content: list[list[str]], + nullable: bool, + expected: int, + novaseq_x_sample_sheet_validator: SampleSheetValidator, +): + """Test that a cycle value is fetched when the content is correct.""" + # GIVEN a sample sheet validator with a modified sample sheet content + novaseq_x_sample_sheet_validator.content = sample_sheet_content + assert len(novaseq_x_sample_sheet_validator.content) == 1 + + # WHEN fetching the cycle value + result = novaseq_x_sample_sheet_validator._get_cycle( + cycle_name=SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_2, nullable=nullable + ) + + # THEN the correct value is returned + assert result == expected + + +def test_get_cycle_missing_cycle(novaseq_x_sample_sheet_validator: SampleSheetValidator, caplog): + """Test that fetching a missing cycle value when nullable is False fails.""" + # GIVEN a sample sheet validator with a modified sample sheet content + novaseq_x_sample_sheet_validator.content = [["not_a_cycle", 10]] + assert len(novaseq_x_sample_sheet_validator.content) == 1 + + # WHEN fetching the cycle value + with pytest.raises(SampleSheetError): + # THEN a SampleSheetError is raised + novaseq_x_sample_sheet_validator._get_cycle( + cycle_name=SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_2, nullable=False + ) + assert ( + f"No {SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_2} found in sample sheet" + in caplog.text + ) + + +def test_set_cycles(novaseq_x_sample_sheet_validator: SampleSheetValidator, caplog): + """Test that the correct values for the cycles are set.""" + # GIVEN a sample sheet validator with a valid content and unset cycle values + assert novaseq_x_sample_sheet_validator.read1_cycles is None + assert novaseq_x_sample_sheet_validator.read2_cycles is None + assert novaseq_x_sample_sheet_validator.index1_cycles is None + assert novaseq_x_sample_sheet_validator.index2_cycles is None + + # WHEN setting the cycles + novaseq_x_sample_sheet_validator.set_cycles() + + # THEN the correct values are set + assert novaseq_x_sample_sheet_validator.read1_cycles == 151 + assert novaseq_x_sample_sheet_validator.read2_cycles == 151 + assert novaseq_x_sample_sheet_validator.index1_cycles == 10 + assert novaseq_x_sample_sheet_validator.index2_cycles == 10 + + +@pytest.mark.parametrize( + "sample_sheet_validator", + [ + "hiseq_x_single_index_sample_sheet_validator", + "hiseq_x_single_index_bcl2fastq_sample_sheet_validator", + "hiseq_x_dual_index_sample_sheet_validator", + "hiseq_x_dual_index_bcl2fastq_sample_sheet_validator", + "hiseq_2500_dual_index_sample_sheet_validator", + "hiseq_2500_dual_index_bcl2fastq_sample_sheet_validator", + "hiseq_2500_custom_index_sample_sheet_validator", + "hiseq_2500_custom_index_bcl2fastq_sample_sheet_validator", + "novaseq_6000_pre_1_5_kits_sample_sheet_validator", + "novaseq_6000_post_1_5_kits_sample_sheet_validator", + "novaseq_x_sample_sheet_validator", + ], +) +def test_validate_sample_sheet(sample_sheet_validator: str, request: FixtureRequest): + """Test that a correct sample sheet passes validation.""" + # GIVEN a correct sample sheet and a sample sheet validator + validator: SampleSheetValidator = request.getfixturevalue(sample_sheet_validator) + + # WHEN validating the sample sheet + validator.validate_sample_sheet() + + # THEN no error is raised diff --git a/tests/cli/demultiplex/test_validate_sample_sheet.py b/tests/cli/demultiplex/test_validate_sample_sheet.py index 660e466346..f1a2f78972 100644 --- a/tests/cli/demultiplex/test_validate_sample_sheet.py +++ b/tests/cli/demultiplex/test_validate_sample_sheet.py @@ -1,10 +1,10 @@ from pathlib import Path +import pytest +from _pytest.fixtures import FixtureRequest from click.testing import CliRunner, Result -from cg.apps.demultiplex.sample_sheet.read_sample_sheet import ( - get_sample_sheet_from_file, -) +from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_sample_sheet_from_file from cg.cli.demultiplex.sample_sheet import validate_sample_sheet from cg.constants import EXIT_SUCCESS, FileExtensions @@ -53,14 +53,30 @@ def test_validate_sample_sheet_wrong_file_type( assert "seems to be in wrong format" in caplog.text +@pytest.mark.parametrize( + "sample_sheet_path", + [ + "hiseq_x_single_index_bcl2fastq_sample_sheet", + "hiseq_x_dual_index_bcl2fastq_sample_sheet", + "hiseq_2500_dual_index_bcl2fastq_sample_sheet", + "hiseq_2500_custom_index_bcl2fastq_sample_sheet", + ], + ids=[ + "hiseq_x_single_index", + "hiseq_x_dual_index", + "hiseq_2500_dual_index", + "hiseq_2500_custom_index", + ], +) def test_validate_correct_bcl2fastq_sample_sheet( cli_runner: CliRunner, - novaseq_bcl2fastq_sample_sheet_path: Path, + sample_sheet_path: Path, + request: FixtureRequest, ): """Test validate sample sheet when using a bcl2fastq sample sheet.""" # GIVEN the path to a bcl2fastq sample sheet that exists - sample_sheet: Path = novaseq_bcl2fastq_sample_sheet_path + sample_sheet: Path = request.getfixturevalue(sample_sheet_path) assert sample_sheet.exists() # GIVEN that the sample sheet is correct @@ -76,14 +92,36 @@ def test_validate_correct_bcl2fastq_sample_sheet( assert result.exit_code == EXIT_SUCCESS -def test_validate_correct_dragen_sample_sheet( +@pytest.mark.parametrize( + "sample_sheet_path", + [ + "hiseq_x_single_index_sample_sheet_path", + "hiseq_x_dual_index_sample_sheet_path", + "hiseq_2500_dual_index_sample_sheet_path", + "hiseq_2500_custom_index_sample_sheet_path", + "novaseq_6000_pre_1_5_kits_correct_sample_sheet_path", + "novaseq_6000_post_1_5_kits_correct_sample_sheet_path", + "novaseq_x_correct_sample_sheet", + ], + ids=[ + "hiseq_x_single_index", + "hiseq_x_dual_index", + "hiseq_2500_dual_index", + "hiseq_2500_custom_index", + "novaseq_6000_pre_1_5_kits", + "novaseq_6000_post_1_5_kits", + "novaseq_x", + ], +) +def test_validate_correct_v2_sample_sheet( cli_runner: CliRunner, - novaseq_bcl_convert_sample_sheet_path: Path, + sample_sheet_path: str, + request: FixtureRequest, ): """Test validate sample sheet when using a BCLconvert sample sheet.""" # GIVEN the path to a Bcl2fastq sample sheet that exists - sample_sheet: Path = novaseq_bcl_convert_sample_sheet_path + sample_sheet: Path = request.getfixturevalue(sample_sheet_path) assert sample_sheet.exists() # GIVEN that the sample sheet is correct diff --git a/tests/conftest.py b/tests/conftest.py index 2f0580ef39..798d9f2704 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -71,6 +71,7 @@ "tests.fixture_plugins.demultiplex_fixtures.path_fixtures", "tests.fixture_plugins.demultiplex_fixtures.run_parameters_fixtures", "tests.fixture_plugins.demultiplex_fixtures.sample_fixtures", + "tests.fixture_plugins.demultiplex_fixtures.sample_sheet_validator_fixtures", ] # Case fixtures diff --git a/tests/fixture_plugins/demultiplex_fixtures/name_fixtures.py b/tests/fixture_plugins/demultiplex_fixtures/name_fixtures.py index a39dfb12b9..9c0b6c1f8c 100644 --- a/tests/fixture_plugins/demultiplex_fixtures/name_fixtures.py +++ b/tests/fixture_plugins/demultiplex_fixtures/name_fixtures.py @@ -77,6 +77,12 @@ def bcl_convert_flow_cell_full_name() -> str: return "211101_A00187_0615_AHLG5GDRZZ" +@pytest.fixture +def novaseq_6000_post_1_5_kits_flow_cell_full_name() -> str: + """Return the full name of a NovaSeq 6000 post 1.5 kits flow cell.""" + return "230912_A00187_1009_AHK33MDRX3" + + @pytest.fixture(scope="session") def novaseq_x_flow_cell_full_name() -> str: """Return the full name of a NovaSeqX flow cell.""" diff --git a/tests/fixture_plugins/demultiplex_fixtures/override_cycles_validator_fixtures.py b/tests/fixture_plugins/demultiplex_fixtures/override_cycles_validator_fixtures.py new file mode 100644 index 0000000000..026726b2d2 --- /dev/null +++ b/tests/fixture_plugins/demultiplex_fixtures/override_cycles_validator_fixtures.py @@ -0,0 +1,87 @@ +import pytest + +from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import OverrideCyclesValidator + + +@pytest.fixture +def hiseq_x_single_index_override_cycles_validator() -> OverrideCyclesValidator: + """Return a HiseqX single index override cycles validator.""" + return OverrideCyclesValidator( + run_read1_cycles=151, + run_read2_cycles=151, + run_index1_cycles=8, + run_index2_cycles=None, + is_reverse_complement=False, + ) + + +@pytest.fixture +def hiseq_x_dual_index_override_cycles_validator() -> OverrideCyclesValidator: + """Return a HiseqX dual index override cycles validator.""" + return OverrideCyclesValidator( + run_read1_cycles=151, + run_read2_cycles=151, + run_index1_cycles=8, + run_index2_cycles=8, + is_reverse_complement=False, + ) + + +@pytest.fixture +def hiseq_2500_dual_index_override_cycles_validator() -> OverrideCyclesValidator: + """Return a Hiseq 2500 dual index override cycles validator.""" + return OverrideCyclesValidator( + run_read1_cycles=101, + run_read2_cycles=101, + run_index1_cycles=8, + run_index2_cycles=8, + is_reverse_complement=False, + ) + + +@pytest.fixture +def hiseq_2500_custom_index_override_cycles_validator() -> OverrideCyclesValidator: + """Return a Hiseq 2500 custom index override cycles validator.""" + return OverrideCyclesValidator( + run_read1_cycles=101, + run_read2_cycles=101, + run_index1_cycles=17, + run_index2_cycles=8, + is_reverse_complement=False, + ) + + +@pytest.fixture +def novaseq_6000_pre_1_5_kits_override_cycles_validator() -> OverrideCyclesValidator: + """Return a NovaSeq 6000 pre 1.5 kits override cycles validator.""" + return OverrideCyclesValidator( + run_read1_cycles=151, + run_read2_cycles=151, + run_index1_cycles=10, + run_index2_cycles=10, + is_reverse_complement=False, + ) + + +@pytest.fixture +def novaseq_6000_post_1_5_kits_override_cycles_validator() -> OverrideCyclesValidator: + """Return a NovaSeq 6000 post 1.5 kits override cycles validator.""" + return OverrideCyclesValidator( + run_read1_cycles=151, + run_read2_cycles=151, + run_index1_cycles=10, + run_index2_cycles=10, + is_reverse_complement=False, + ) + + +@pytest.fixture +def novaseq_x_override_cycles_validator() -> OverrideCyclesValidator: + """Return a NovaSeqX override cycles validator.""" + return OverrideCyclesValidator( + run_read1_cycles=151, + run_read2_cycles=151, + run_index1_cycles=10, + run_index2_cycles=10, + is_reverse_complement=True, + ) diff --git a/tests/fixture_plugins/demultiplex_fixtures/path_fixtures.py b/tests/fixture_plugins/demultiplex_fixtures/path_fixtures.py index 94072188d1..35945a765a 100644 --- a/tests/fixture_plugins/demultiplex_fixtures/path_fixtures.py +++ b/tests/fixture_plugins/demultiplex_fixtures/path_fixtures.py @@ -10,6 +10,8 @@ from cg.models.flow_cell.flow_cell import FlowCellDirectoryData CORRECT_SAMPLE_SHEET: str = "CorrectSampleSheet.csv" +V2_SAMPLE_SHEET: str = "SampleSheet_bcl_convert.csv" +V1_SAMPLE_SHEET: str = "SampleSheet_bcl2fastq.csv" @pytest.fixture @@ -64,12 +66,12 @@ def flow_cell_working_directory_bcl2fastq( @pytest.fixture(name="tmp_flow_cell_directory_bclconvert") def flow_cell_working_directory_bclconvert( - bcl_convert_flow_cell_dir: Path, tmp_illumina_flow_cells_directory + novaseq_6000_post_1_5_kits_flow_cell_full_name: str, tmp_illumina_flow_cells_directory ) -> Path: """Return the path to a working directory that will be deleted after test is run. This is a path to a flow cell directory with the run parameters present. """ - return Path(tmp_illumina_flow_cells_directory, bcl_convert_flow_cell_dir.name) + return Path(tmp_illumina_flow_cells_directory, novaseq_6000_post_1_5_kits_flow_cell_full_name) @pytest.fixture @@ -175,21 +177,6 @@ def demultiplexed_runs_unfinished_bcl2fastq_flow_cell_directory( return Path(tmp_demultiplexed_runs_not_finished_directory, bcl2fastq_flow_cell_full_name) -@pytest.fixture -def novaseq6000_bcl_convert_sample_sheet_path() -> Path: - """Return the path to a NovaSeq 6000 BCL convert sample sheet.""" - return Path( - "tests", - "fixtures", - "apps", - "sequencing_metrics_parser", - "230622_A00621_0864_AHY7FFDRX2", - "Unaligned", - "Reports", - "SampleSheet.csv", - ) - - # Directory fixtures @@ -251,8 +238,10 @@ def illumina_demux_results_not_finished_dir(demultiplex_fixtures: Path) -> Path: @pytest.fixture -def novaseq_6000_post_1_5_kits_flow_cell_path(tmp_illumina_flow_cells_directory: Path) -> Path: - return Path(tmp_illumina_flow_cells_directory, "230912_A00187_1009_AHK33MDRX3") +def novaseq_6000_post_1_5_kits_flow_cell_path( + tmp_illumina_flow_cells_directory: Path, novaseq_6000_post_1_5_kits_flow_cell_full_name: str +) -> Path: + return Path(tmp_illumina_flow_cells_directory, novaseq_6000_post_1_5_kits_flow_cell_full_name) @pytest.fixture @@ -294,18 +283,42 @@ def novaseq_x_manifest_file(novaseq_x_flow_cell_dir: Path) -> Path: def hiseq_x_single_index_flow_cell_dir( illumina_flow_cells_directory, hiseq_x_single_index_flow_cell_name: str ) -> Path: - """Return the path to a HiSeqX flow cell.""" + """Return the path to a single index HiSeqX flow cell.""" return Path(illumina_flow_cells_directory, hiseq_x_single_index_flow_cell_name) +@pytest.fixture(scope="session") +def hiseq_x_single_index_sample_sheet_path(hiseq_x_single_index_flow_cell_dir: Path) -> Path: + """Return the path to a single index HiSeqX v2 sample sheet.""" + return Path(hiseq_x_single_index_flow_cell_dir, V2_SAMPLE_SHEET) + + +@pytest.fixture(scope="session") +def hiseq_x_single_index_bcl2fastq_sample_sheet(hiseq_x_single_index_flow_cell_dir: Path) -> Path: + """Return the path to a single index HiSeqX Bcl2fastq sample sheet.""" + return Path(hiseq_x_single_index_flow_cell_dir, V1_SAMPLE_SHEET) + + @pytest.fixture(scope="session") def hiseq_x_dual_index_flow_cell_dir( illumina_flow_cells_directory, hiseq_x_dual_index_flow_cell_name: str ) -> Path: - """Return the path to a HiSeqX flow cell.""" + """Return the path to a dual index HiSeqX flow cell.""" return Path(illumina_flow_cells_directory, hiseq_x_dual_index_flow_cell_name) +@pytest.fixture(scope="session") +def hiseq_x_dual_index_sample_sheet_path(hiseq_x_dual_index_flow_cell_dir: Path) -> Path: + """Return the path to a dual index HiSeqX v2 sample sheet.""" + return Path(hiseq_x_dual_index_flow_cell_dir, V2_SAMPLE_SHEET) + + +@pytest.fixture(scope="session") +def hiseq_x_dual_index_bcl2fastq_sample_sheet(hiseq_x_dual_index_flow_cell_dir: Path) -> Path: + """Return the path to a dual index HiSeqX Bcl2fastq sample sheet.""" + return Path(hiseq_x_dual_index_flow_cell_dir, V1_SAMPLE_SHEET) + + @pytest.fixture(scope="session") def hiseq_2500_dual_index_flow_cell_dir( illumina_flow_cells_directory, hiseq_2500_dual_index_flow_cell_name: str @@ -314,6 +327,18 @@ def hiseq_2500_dual_index_flow_cell_dir( return Path(illumina_flow_cells_directory, hiseq_2500_dual_index_flow_cell_name) +@pytest.fixture(scope="session") +def hiseq_2500_dual_index_sample_sheet_path(hiseq_2500_dual_index_flow_cell_dir: Path) -> Path: + """Return the path to a dual index HiSeq2500 v2 sample sheet.""" + return Path(hiseq_2500_dual_index_flow_cell_dir, V2_SAMPLE_SHEET) + + +@pytest.fixture(scope="session") +def hiseq_2500_dual_index_bcl2fastq_sample_sheet(hiseq_2500_dual_index_flow_cell_dir: Path) -> Path: + """Return the path to a dual index HiSeq2500 Bcl2fastq sample sheet.""" + return Path(hiseq_2500_dual_index_flow_cell_dir, V1_SAMPLE_SHEET) + + @pytest.fixture(scope="session") def hiseq_2500_custom_index_flow_cell_dir( illumina_flow_cells_directory, hiseq_2500_custom_index_flow_cell_name: str @@ -322,6 +347,20 @@ def hiseq_2500_custom_index_flow_cell_dir( return Path(illumina_flow_cells_directory, hiseq_2500_custom_index_flow_cell_name) +@pytest.fixture(scope="session") +def hiseq_2500_custom_index_sample_sheet_path(hiseq_2500_custom_index_flow_cell_dir: Path) -> Path: + """Return the path to a custom index HiSeq2500 v2 sample sheet.""" + return Path(hiseq_2500_custom_index_flow_cell_dir, V2_SAMPLE_SHEET) + + +@pytest.fixture(scope="session") +def hiseq_2500_custom_index_bcl2fastq_sample_sheet( + hiseq_2500_custom_index_flow_cell_dir: Path, +) -> Path: + """Return the path to a custom index HiSeq2500 Bcl2fastq sample sheet.""" + return Path(hiseq_2500_custom_index_flow_cell_dir, V1_SAMPLE_SHEET) + + @pytest.fixture def novaseq_x_flow_cell_dir(illumina_flow_cells_directory: Path) -> Path: """Return the path to a NovaSeqX flow cell.""" @@ -344,18 +383,6 @@ def bcl_convert_flow_cell_dir( return Path(illumina_flow_cells_directory, bcl_convert_flow_cell_full_name) -@pytest.fixture(scope="session") -def novaseq_bcl2fastq_sample_sheet_path(bcl2fastq_flow_cell_dir: Path) -> Path: - """Return the path to a NovaSeq6000 Bcl2fastq sample sheet.""" - return Path(bcl2fastq_flow_cell_dir, DemultiplexingDirsAndFiles.SAMPLE_SHEET_FILE_NAME) - - -@pytest.fixture(scope="session") -def novaseq_bcl_convert_sample_sheet_path(bcl_convert_flow_cell_dir: Path) -> Path: - """Return the path to a NovaSeq6000 bcl_convert sample sheet.""" - return Path(bcl_convert_flow_cell_dir, DemultiplexingDirsAndFiles.SAMPLE_SHEET_FILE_NAME) - - @pytest.fixture(scope="session") def run_parameters_wrong_instrument(run_parameters_dir: Path) -> Path: """Return a NovaSeqX run parameters file path with a wrong instrument value.""" diff --git a/tests/fixture_plugins/demultiplex_fixtures/sample_sheet_validator_fixtures.py b/tests/fixture_plugins/demultiplex_fixtures/sample_sheet_validator_fixtures.py new file mode 100644 index 0000000000..4683c69848 --- /dev/null +++ b/tests/fixture_plugins/demultiplex_fixtures/sample_sheet_validator_fixtures.py @@ -0,0 +1,93 @@ +from pathlib import Path + +import pytest + +from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import SampleSheetValidator + + +@pytest.fixture +def hiseq_x_single_index_sample_sheet_validator( + hiseq_x_single_index_sample_sheet_path: Path, +) -> SampleSheetValidator: + """Return a HiseqX single index sample sheet validator.""" + return SampleSheetValidator(path=hiseq_x_single_index_sample_sheet_path) + + +@pytest.fixture +def hiseq_x_single_index_bcl2fastq_sample_sheet_validator( + hiseq_x_single_index_bcl2fastq_sample_sheet: Path, +) -> SampleSheetValidator: + """Return a HiseqX dual index sample sheet validator.""" + return SampleSheetValidator(path=hiseq_x_single_index_bcl2fastq_sample_sheet) + + +@pytest.fixture +def hiseq_x_dual_index_sample_sheet_validator( + hiseq_x_dual_index_sample_sheet_path: Path, +) -> SampleSheetValidator: + """Return a HiseqX dual index sample sheet validator.""" + return SampleSheetValidator(path=hiseq_x_dual_index_sample_sheet_path) + + +@pytest.fixture +def hiseq_x_dual_index_bcl2fastq_sample_sheet_validator( + hiseq_x_dual_index_bcl2fastq_sample_sheet: Path, +) -> SampleSheetValidator: + """Return a HiseqX dual index Bcl2fastq sample sheet validator.""" + return SampleSheetValidator(path=hiseq_x_dual_index_bcl2fastq_sample_sheet) + + +@pytest.fixture +def hiseq_2500_dual_index_sample_sheet_validator( + hiseq_2500_dual_index_sample_sheet_path: Path, +) -> SampleSheetValidator: + """Return a Hiseq 2500 dual index sample sheet validator.""" + return SampleSheetValidator(path=hiseq_2500_dual_index_sample_sheet_path) + + +@pytest.fixture +def hiseq_2500_dual_index_bcl2fastq_sample_sheet_validator( + hiseq_2500_dual_index_bcl2fastq_sample_sheet: Path, +) -> SampleSheetValidator: + """Return a Hiseq 2500 dual index sample sheet validator.""" + return SampleSheetValidator(path=hiseq_2500_dual_index_bcl2fastq_sample_sheet) + + +@pytest.fixture +def hiseq_2500_custom_index_sample_sheet_validator( + hiseq_2500_custom_index_sample_sheet_path: Path, +) -> SampleSheetValidator: + """Return a Hiseq 2500 custom index sample sheet validator.""" + return SampleSheetValidator(path=hiseq_2500_custom_index_sample_sheet_path) + + +@pytest.fixture +def hiseq_2500_custom_index_bcl2fastq_sample_sheet_validator( + hiseq_2500_custom_index_bcl2fastq_sample_sheet: Path, +) -> SampleSheetValidator: + """Return a Hiseq 2500 custom index sample sheet validator.""" + return SampleSheetValidator(path=hiseq_2500_custom_index_bcl2fastq_sample_sheet) + + +@pytest.fixture +def novaseq_6000_pre_1_5_kits_sample_sheet_validator( + novaseq_6000_pre_1_5_kits_correct_sample_sheet_path: Path, +) -> SampleSheetValidator: + """Return a NovaSeq 6000 pre 1.5 kits sample sheet validator.""" + return SampleSheetValidator(path=novaseq_6000_pre_1_5_kits_correct_sample_sheet_path) + + +@pytest.fixture +def novaseq_6000_post_1_5_kits_sample_sheet_validator( + novaseq_6000_post_1_5_kits_correct_sample_sheet_path: Path, +) -> SampleSheetValidator: + """Return a NovaSeq 6000 post 1.5 kits sample sheet validator.""" + return SampleSheetValidator(path=novaseq_6000_post_1_5_kits_correct_sample_sheet_path) + + +@pytest.fixture +def novaseq_x_sample_sheet_validator( + novaseq_x_correct_sample_sheet: Path, +) -> SampleSheetValidator: + """Return a NovaSeq X sample sheet validator.""" + return SampleSheetValidator(path=novaseq_x_correct_sample_sheet) diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/180509_D00450_0598_BHGYFNBCX2/SampleSheet_dragen.csv b/tests/fixtures/apps/demultiplexing/flow_cells/180509_D00450_0598_BHGYFNBCX2/SampleSheet_bcl_convert.csv similarity index 100% rename from tests/fixtures/apps/demultiplexing/flow_cells/180509_D00450_0598_BHGYFNBCX2/SampleSheet_dragen.csv rename to tests/fixtures/apps/demultiplexing/flow_cells/180509_D00450_0598_BHGYFNBCX2/SampleSheet_bcl_convert.csv diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/181005_D00410_0735_BHM2LNBCX2/SampleSheet_dragen.csv b/tests/fixtures/apps/demultiplexing/flow_cells/181005_D00410_0735_BHM2LNBCX2/SampleSheet_bcl_convert.csv similarity index 100% rename from tests/fixtures/apps/demultiplexing/flow_cells/181005_D00410_0735_BHM2LNBCX2/SampleSheet_dragen.csv rename to tests/fixtures/apps/demultiplexing/flow_cells/181005_D00410_0735_BHM2LNBCX2/SampleSheet_bcl_convert.csv diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/230912_A00187_1009_AHK33MDRX3/SampleSheet.csv b/tests/fixtures/apps/demultiplexing/flow_cells/230912_A00187_1009_AHK33MDRX3/SampleSheet.csv index ee0455210b..4868971fc9 100644 --- a/tests/fixtures/apps/demultiplexing/flow_cells/230912_A00187_1009_AHK33MDRX3/SampleSheet.csv +++ b/tests/fixtures/apps/demultiplexing/flow_cells/230912_A00187_1009_AHK33MDRX3/SampleSheet.csv @@ -3,6 +3,7 @@ FileFormatVersion,2 RunName,HK33MDRX3 InstrumentPlatform,NovaSeq6000 IndexOrientation,Forward +IndexSettings,NovaSeq6000Post1.5Kits [Reads] Read1Cycles,151 Read2Cycles,151 @@ -12,84 +13,84 @@ Index2Cycles,10 SoftwareVersion,4.1.7 FastqCompressionFormat,gzip [BCLConvert_Data] -Lane,Sample_ID,Index,Index2,OverrideCycles,AdapterRead1,AdapterRead2,BarcodeMismatchesIndex1,BarcodeMismatchesIndex2 -1,ACC12642A7,ACCGGTCAAG,TCTTCACCTT,Y151;I10;I10;Y151,,,1,1 -1,ACC12642A4,TCCACTGGAG,CTGTTCCGGT,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A1,GACCACTCGA,AGAGTTGGAT,Y151;I10;I10;Y151,,,1,1 -1,ACC12642A2,ACTTGGCTTC,TGGCTCTATT,Y151;I10;I10;Y151,,,1,1 -1,ACC12642A3,AATGTAACCG,GACTGCAGCA,Y151;I10;I10;Y151,,,1,0 -1,ACC12637A5,TCAGTCGGCT,CCTGGTACAA,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A9,ATAGATGCCT,AGAGTCTGAG,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A3,AGGAGTCACT,ACCTTCGAGT,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A12,GACAGCTGTA,TGATCCTGAT,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A10,TATCCTTCGC,CAGCCACATC,Y151;I10;I10;Y151,,,1,1 -1,ACC12642A5,TGCCTCAGAG,TCTAGCAACG,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A2,CCTAGAGATA,TCTGCCATAT,Y151;I10;I10;Y151,,,1,1 -1,ACC12642A1,ACTAAGCATC,CTGGCATGTG,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A4,ACCTCCGCTA,AACCTTCAAG,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A6,GATAGAGGTA,ACAGCTTCCA,Y151;I10;I10;Y151,,,1,1 -1,ACC12637A7,ACACACTTCG,GCAGTTAACA,Y151;I10;I10;Y151,,,1,1 -1,ACC12641A2,TGGAGTAGAC,CCGTTCACCT,Y151;I10;I10;Y151,,,1,1 -1,ACC12641A1,CAAGAACGAA,AGCCTTGAAT,Y151;I10;I10;Y151,,,1,1 -1,ACC12636A2,TCCAACCAGA,TTCTTAGTCG,Y151;I10;I10;Y151,,,1,1 -1,ACC12616A3,TGTTCACACG,TTCTTACGAC,Y151;I10;I10;Y151,,,1,1 -1,ACC12661A1,GTGTAACCGC,TCTTCTCTGT,Y151;I10;I10;Y151,,,1,1 -1,ACC12661A2,GCTAGCCTTC,ATCCGAGAAT,Y151;I10;I10;Y151,,,1,1 -1,ACC10479A88,ACTGTAATGG,AGAGAGACAA,Y151;I10;I10;Y151,,,1,1 -1,ACC12661A3,TCCATGGCAC,ACTAGCACGC,Y151;I10;I10;Y151,,,1,1 -1,ACC12661A4,GAACGTAAGA,AGCTGGTAGG,Y151;I10;I10;Y151,,,1,1 -1,ACC12663A1,CATTGCAACA,CACAATTCGG,Y151;I10;I10;Y151,,,1,1 -1,ACC12663A2,CGCTCTTATA,CCGTTGCAAT,Y151;I10;I10;Y151,,,1,1 -1,ACC12663A3,CTGCAGGAAG,TAACGTAGAG,Y151;I10;I10;Y151,,,1,1 -1,ACC12663A4,ACGTTCACCA,GTACCGATCG,Y151;I10;I10;Y151,,,1,1 -1,ACC11193A63,CTAACCGAGA,AAGATCTTGC,Y151;I10;I10;Y151,,,1,1 -1,ACC12659A1,TATCATAGGC,GACTAGCTTG,Y151;I10;I10;Y151,,,1,1 -1,ACC12659A2,CCACCTTCAA,CAACGATGAG,Y151;I10;I10;Y151,,,1,1 -1,ACC12659A3,CAAGAAGCGC,CTCGAGATAA,Y151;I10;I10;Y151,,,1,1 -1,ACC12659A4,CATTGTTCAC,TAGACATGAG,Y151;I10;I10;Y151,,,1,1 -1,ACC12659A5,CTCGCCTAAC,GAGTGTTCTT,Y151;I10;I10;Y151,,,1,1 -1,ACC12649A2,TCTCTACT,GAACCGCG,Y151;I8N2;I8N2;Y151,,,1,1 -1,ACC12650A4,GGCTTAAG,GGTCACGA,Y151;I8N2;I8N2;Y151,,,1,1 -1,ACC12650A5,AATCCGGA,AACTGTAG,Y151;I8N2;I8N2;Y151,,,1,0 -1,ACC12650A6,TAATACAG,GTGAATAT,Y151;I8N2;I8N2;Y151,,,1,1 -1,ACC12631A1,TACCGAGG,AGTTCAGG,Y151;I8N2;I8N2;Y151,,,1,1 -2,ACC12642A7,ACCGGTCAAG,TCTTCACCTT,Y151;I10;I10;Y151,,,1,1 -2,ACC12642A4,TCCACTGGAG,CTGTTCCGGT,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A1,GACCACTCGA,AGAGTTGGAT,Y151;I10;I10;Y151,,,1,1 -2,ACC12642A2,ACTTGGCTTC,TGGCTCTATT,Y151;I10;I10;Y151,,,1,1 -2,ACC12642A3,AATGTAACCG,GACTGCAGCA,Y151;I10;I10;Y151,,,1,0 -2,ACC12637A5,TCAGTCGGCT,CCTGGTACAA,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A9,ATAGATGCCT,AGAGTCTGAG,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A3,AGGAGTCACT,ACCTTCGAGT,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A12,GACAGCTGTA,TGATCCTGAT,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A10,TATCCTTCGC,CAGCCACATC,Y151;I10;I10;Y151,,,1,1 -2,ACC12642A5,TGCCTCAGAG,TCTAGCAACG,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A2,CCTAGAGATA,TCTGCCATAT,Y151;I10;I10;Y151,,,1,1 -2,ACC12642A1,ACTAAGCATC,CTGGCATGTG,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A4,ACCTCCGCTA,AACCTTCAAG,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A6,GATAGAGGTA,ACAGCTTCCA,Y151;I10;I10;Y151,,,1,1 -2,ACC12637A7,ACACACTTCG,GCAGTTAACA,Y151;I10;I10;Y151,,,1,1 -2,ACC12641A2,TGGAGTAGAC,CCGTTCACCT,Y151;I10;I10;Y151,,,1,1 -2,ACC12641A1,CAAGAACGAA,AGCCTTGAAT,Y151;I10;I10;Y151,,,1,1 -2,ACC12636A2,TCCAACCAGA,TTCTTAGTCG,Y151;I10;I10;Y151,,,1,1 -2,ACC12616A3,TGTTCACACG,TTCTTACGAC,Y151;I10;I10;Y151,,,1,1 -2,ACC12661A1,GTGTAACCGC,TCTTCTCTGT,Y151;I10;I10;Y151,,,1,1 -2,ACC12661A2,GCTAGCCTTC,ATCCGAGAAT,Y151;I10;I10;Y151,,,1,1 -2,ACC10479A88,ACTGTAATGG,AGAGAGACAA,Y151;I10;I10;Y151,,,1,1 -2,ACC12661A3,TCCATGGCAC,ACTAGCACGC,Y151;I10;I10;Y151,,,1,1 -2,ACC12661A4,GAACGTAAGA,AGCTGGTAGG,Y151;I10;I10;Y151,,,1,1 -2,ACC12663A1,CATTGCAACA,CACAATTCGG,Y151;I10;I10;Y151,,,1,1 -2,ACC12663A2,CGCTCTTATA,CCGTTGCAAT,Y151;I10;I10;Y151,,,1,1 -2,ACC12663A3,CTGCAGGAAG,TAACGTAGAG,Y151;I10;I10;Y151,,,1,1 -2,ACC12663A4,ACGTTCACCA,GTACCGATCG,Y151;I10;I10;Y151,,,1,1 -2,ACC11193A63,CTAACCGAGA,AAGATCTTGC,Y151;I10;I10;Y151,,,1,1 -2,ACC12659A1,TATCATAGGC,GACTAGCTTG,Y151;I10;I10;Y151,,,1,1 -2,ACC12659A2,CCACCTTCAA,CAACGATGAG,Y151;I10;I10;Y151,,,1,1 -2,ACC12659A3,CAAGAAGCGC,CTCGAGATAA,Y151;I10;I10;Y151,,,1,1 -2,ACC12659A4,CATTGTTCAC,TAGACATGAG,Y151;I10;I10;Y151,,,1,1 -2,ACC12659A5,CTCGCCTAAC,GAGTGTTCTT,Y151;I10;I10;Y151,,,1,1 -2,ACC12649A2,TCTCTACT,GAACCGCG,Y151;I8N2;I8N2;Y151,,,1,1 -2,ACC12650A4,GGCTTAAG,GGTCACGA,Y151;I8N2;I8N2;Y151,,,1,1 -2,ACC12650A5,AATCCGGA,AACTGTAG,Y151;I8N2;I8N2;Y151,,,1,0 -2,ACC12650A6,TAATACAG,GTGAATAT,Y151;I8N2;I8N2;Y151,,,1,1 -2,ACC12631A1,TACCGAGG,AGTTCAGG,Y151;I8N2;I8N2;Y151,,,1,1 +Lane,Sample_ID,Index,Index2,OverrideCycles,BarcodeMismatchesIndex1,BarcodeMismatchesIndex2 +1,ACC12642A7,ACCGGTCAAG,TCTTCACCTT,Y151;I10;I10;Y151,1,1 +1,ACC12642A4,TCCACTGGAG,CTGTTCCGGT,Y151;I10;I10;Y151,1,1 +1,ACC12637A1,GACCACTCGA,AGAGTTGGAT,Y151;I10;I10;Y151,1,1 +1,ACC12642A2,ACTTGGCTTC,TGGCTCTATT,Y151;I10;I10;Y151,1,1 +1,ACC12642A3,AATGTAACCG,GACTGCAGCA,Y151;I10;I10;Y151,1,0 +1,ACC12637A5,TCAGTCGGCT,CCTGGTACAA,Y151;I10;I10;Y151,1,1 +1,ACC12637A9,ATAGATGCCT,AGAGTCTGAG,Y151;I10;I10;Y151,1,1 +1,ACC12637A3,AGGAGTCACT,ACCTTCGAGT,Y151;I10;I10;Y151,1,1 +1,ACC12637A12,GACAGCTGTA,TGATCCTGAT,Y151;I10;I10;Y151,1,1 +1,ACC12637A10,TATCCTTCGC,CAGCCACATC,Y151;I10;I10;Y151,1,1 +1,ACC12642A5,TGCCTCAGAG,TCTAGCAACG,Y151;I10;I10;Y151,1,1 +1,ACC12637A2,CCTAGAGATA,TCTGCCATAT,Y151;I10;I10;Y151,1,1 +1,ACC12642A1,ACTAAGCATC,CTGGCATGTG,Y151;I10;I10;Y151,1,1 +1,ACC12637A4,ACCTCCGCTA,AACCTTCAAG,Y151;I10;I10;Y151,1,1 +1,ACC12637A6,GATAGAGGTA,ACAGCTTCCA,Y151;I10;I10;Y151,1,1 +1,ACC12637A7,ACACACTTCG,GCAGTTAACA,Y151;I10;I10;Y151,1,1 +1,ACC12641A2,TGGAGTAGAC,CCGTTCACCT,Y151;I10;I10;Y151,1,1 +1,ACC12641A1,CAAGAACGAA,AGCCTTGAAT,Y151;I10;I10;Y151,1,1 +1,ACC12636A2,TCCAACCAGA,TTCTTAGTCG,Y151;I10;I10;Y151,1,1 +1,ACC12616A3,TGTTCACACG,TTCTTACGAC,Y151;I10;I10;Y151,1,1 +1,ACC12661A1,GTGTAACCGC,TCTTCTCTGT,Y151;I10;I10;Y151,1,1 +1,ACC12661A2,GCTAGCCTTC,ATCCGAGAAT,Y151;I10;I10;Y151,1,1 +1,ACC10479A88,ACTGTAATGG,AGAGAGACAA,Y151;I10;I10;Y151,1,1 +1,ACC12661A3,TCCATGGCAC,ACTAGCACGC,Y151;I10;I10;Y151,1,1 +1,ACC12661A4,GAACGTAAGA,AGCTGGTAGG,Y151;I10;I10;Y151,1,1 +1,ACC12663A1,CATTGCAACA,CACAATTCGG,Y151;I10;I10;Y151,1,1 +1,ACC12663A2,CGCTCTTATA,CCGTTGCAAT,Y151;I10;I10;Y151,1,1 +1,ACC12663A3,CTGCAGGAAG,TAACGTAGAG,Y151;I10;I10;Y151,1,1 +1,ACC12663A4,ACGTTCACCA,GTACCGATCG,Y151;I10;I10;Y151,1,1 +1,ACC11193A63,CTAACCGAGA,AAGATCTTGC,Y151;I10;I10;Y151,1,1 +1,ACC12659A1,TATCATAGGC,GACTAGCTTG,Y151;I10;I10;Y151,1,1 +1,ACC12659A2,CCACCTTCAA,CAACGATGAG,Y151;I10;I10;Y151,1,1 +1,ACC12659A3,CAAGAAGCGC,CTCGAGATAA,Y151;I10;I10;Y151,1,1 +1,ACC12659A4,CATTGTTCAC,TAGACATGAG,Y151;I10;I10;Y151,1,1 +1,ACC12659A5,CTCGCCTAAC,GAGTGTTCTT,Y151;I10;I10;Y151,1,1 +1,ACC12649A2,TCTCTACT,GAACCGCG,Y151;I8N2;I8N2;Y151,1,1 +1,ACC12650A4,GGCTTAAG,GGTCACGA,Y151;I8N2;I8N2;Y151,1,1 +1,ACC12650A5,AATCCGGA,AACTGTAG,Y151;I8N2;I8N2;Y151,1,0 +1,ACC12650A6,TAATACAG,GTGAATAT,Y151;I8N2;I8N2;Y151,1,1 +1,ACC12631A1,TACCGAGG,AGTTCAGG,Y151;I8N2;I8N2;Y151,1,1 +2,ACC12642A7,ACCGGTCAAG,TCTTCACCTT,Y151;I10;I10;Y151,1,1 +2,ACC12642A4,TCCACTGGAG,CTGTTCCGGT,Y151;I10;I10;Y151,1,1 +2,ACC12637A1,GACCACTCGA,AGAGTTGGAT,Y151;I10;I10;Y151,1,1 +2,ACC12642A2,ACTTGGCTTC,TGGCTCTATT,Y151;I10;I10;Y151,1,1 +2,ACC12642A3,AATGTAACCG,GACTGCAGCA,Y151;I10;I10;Y151,1,0 +2,ACC12637A5,TCAGTCGGCT,CCTGGTACAA,Y151;I10;I10;Y151,1,1 +2,ACC12637A9,ATAGATGCCT,AGAGTCTGAG,Y151;I10;I10;Y151,1,1 +2,ACC12637A3,AGGAGTCACT,ACCTTCGAGT,Y151;I10;I10;Y151,1,1 +2,ACC12637A12,GACAGCTGTA,TGATCCTGAT,Y151;I10;I10;Y151,1,1 +2,ACC12637A10,TATCCTTCGC,CAGCCACATC,Y151;I10;I10;Y151,1,1 +2,ACC12642A5,TGCCTCAGAG,TCTAGCAACG,Y151;I10;I10;Y151,1,1 +2,ACC12637A2,CCTAGAGATA,TCTGCCATAT,Y151;I10;I10;Y151,1,1 +2,ACC12642A1,ACTAAGCATC,CTGGCATGTG,Y151;I10;I10;Y151,1,1 +2,ACC12637A4,ACCTCCGCTA,AACCTTCAAG,Y151;I10;I10;Y151,1,1 +2,ACC12637A6,GATAGAGGTA,ACAGCTTCCA,Y151;I10;I10;Y151,1,1 +2,ACC12637A7,ACACACTTCG,GCAGTTAACA,Y151;I10;I10;Y151,1,1 +2,ACC12641A2,TGGAGTAGAC,CCGTTCACCT,Y151;I10;I10;Y151,1,1 +2,ACC12641A1,CAAGAACGAA,AGCCTTGAAT,Y151;I10;I10;Y151,1,1 +2,ACC12636A2,TCCAACCAGA,TTCTTAGTCG,Y151;I10;I10;Y151,1,1 +2,ACC12616A3,TGTTCACACG,TTCTTACGAC,Y151;I10;I10;Y151,1,1 +2,ACC12661A1,GTGTAACCGC,TCTTCTCTGT,Y151;I10;I10;Y151,1,1 +2,ACC12661A2,GCTAGCCTTC,ATCCGAGAAT,Y151;I10;I10;Y151,1,1 +2,ACC10479A88,ACTGTAATGG,AGAGAGACAA,Y151;I10;I10;Y151,1,1 +2,ACC12661A3,TCCATGGCAC,ACTAGCACGC,Y151;I10;I10;Y151,1,1 +2,ACC12661A4,GAACGTAAGA,AGCTGGTAGG,Y151;I10;I10;Y151,1,1 +2,ACC12663A1,CATTGCAACA,CACAATTCGG,Y151;I10;I10;Y151,1,1 +2,ACC12663A2,CGCTCTTATA,CCGTTGCAAT,Y151;I10;I10;Y151,1,1 +2,ACC12663A3,CTGCAGGAAG,TAACGTAGAG,Y151;I10;I10;Y151,1,1 +2,ACC12663A4,ACGTTCACCA,GTACCGATCG,Y151;I10;I10;Y151,1,1 +2,ACC11193A63,CTAACCGAGA,AAGATCTTGC,Y151;I10;I10;Y151,1,1 +2,ACC12659A1,TATCATAGGC,GACTAGCTTG,Y151;I10;I10;Y151,1,1 +2,ACC12659A2,CCACCTTCAA,CAACGATGAG,Y151;I10;I10;Y151,1,1 +2,ACC12659A3,CAAGAAGCGC,CTCGAGATAA,Y151;I10;I10;Y151,1,1 +2,ACC12659A4,CATTGTTCAC,TAGACATGAG,Y151;I10;I10;Y151,1,1 +2,ACC12659A5,CTCGCCTAAC,GAGTGTTCTT,Y151;I10;I10;Y151,1,1 +2,ACC12649A2,TCTCTACT,GAACCGCG,Y151;I8N2;I8N2;Y151,1,1 +2,ACC12650A4,GGCTTAAG,GGTCACGA,Y151;I8N2;I8N2;Y151,1,1 +2,ACC12650A5,AATCCGGA,AACTGTAG,Y151;I8N2;I8N2;Y151,1,0 +2,ACC12650A6,TAATACAG,GTGAATAT,Y151;I8N2;I8N2;Y151,1,1 +2,ACC12631A1,TACCGAGG,AGTTCAGG,Y151;I8N2;I8N2;Y151,1,1 diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/230912_A00187_1009_AHK33MDRX3/demuxstarted.txt b/tests/fixtures/apps/demultiplexing/flow_cells/230912_A00187_1009_AHK33MDRX3/demuxstarted.txt deleted file mode 100644 index e69de29bb2..0000000000