From 8524a64767ec07fb011999f23348389b6d348874 Mon Sep 17 00:00:00 2001 From: Sebastian Diaz Date: Thu, 30 Nov 2023 10:19:21 +0100 Subject: [PATCH] feat - add HiSeq RunParameters file parser (#2653)(MINOR) Closes #2651. Creates a child class of `RunParameters` that reads files from HiSeq sequencers, both 2500 and X. This is with the purpose of knowing the reads and index reads of the sequencing. ### Added - Class `RunParametersHiSeq`, implementing abstract methods from parent. - Constants to parse the elements from the XML file - XMLError exception - Test for new class ### Changed - Moved and renamed function `node_not_found` in `RunParameters` class to `cg/io/xml.py:validate_node_exists` - Replaced RunParametersError exception for XMLError in the validation of the nodes. ### Fixed - Removed unused sample sheets in fixtures --- cg/apps/demultiplex/sample_sheet/create.py | 4 +- cg/apps/demultiplex/sample_sheet/index.py | 30 +- .../sample_sheet/sample_sheet_creator.py | 39 ++- cg/cli/demultiplex/sample_sheet.py | 8 +- cg/constants/demultiplexing.py | 17 +- cg/constants/sequencing.py | 2 +- cg/exc.py | 4 + cg/io/xml.py | 31 +- cg/models/demultiplex/run_parameters.py | 199 ++++++++---- cg/models/flow_cell/flow_cell.py | 47 +-- tests/apps/demultiplex/conftest.py | 4 +- .../demultiplex/test_sample_sheet_creator.py | 10 +- .../demultiplex/test_create_sample_sheet.py | 3 +- tests/conftest.py | 138 ++++++-- .../apps/demultiplexing/SampleSheet.csv | 0 .../SampleSheet.csv | 10 + .../runParameters.xml | 125 +++++++ .../RunParameters.xml | 136 ++++++++ .../SampleSheet.csv | 37 +++ .../demultiplexing/flow_cells/SampleSheet.csv | 0 ...ters_hiseq_2500_different_index_cycles.xml | 137 ++++++++ ...rs_novaseq_6000_different_index_cycles.xml | 8 - ...eters_novaseq_X_different_index_cycles.xml | 1 - ...nParameters_novaseq_X_wrong_instrument.xml | 65 ++++ ...ovaseq_no_software_nor_reagent_version.xml | 10 - tests/meta/demultiplex/conftest.py | 4 +- tests/models/demultiplexing/conftest.py | 2 +- .../demultiplexing/test_run_parameters.py | 304 +++++++++++------- tests/models/flow_cell/test_flowcell_model.py | 77 ++++- 29 files changed, 1133 insertions(+), 319 deletions(-) delete mode 100644 tests/fixtures/apps/demultiplexing/SampleSheet.csv create mode 100644 tests/fixtures/apps/demultiplexing/flow_cells/160202_ST-E00266_0064_AHKHHGCCXX/SampleSheet.csv create mode 100644 tests/fixtures/apps/demultiplexing/flow_cells/160202_ST-E00266_0064_AHKHHGCCXX/runParameters.xml create mode 100644 tests/fixtures/apps/demultiplexing/flow_cells/180504_D00410_0608_BHGYGYBCX2/RunParameters.xml create mode 100644 tests/fixtures/apps/demultiplexing/flow_cells/180504_D00410_0608_BHGYGYBCX2/SampleSheet.csv delete mode 100644 tests/fixtures/apps/demultiplexing/flow_cells/SampleSheet.csv create mode 100644 tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_hiseq_2500_different_index_cycles.xml create mode 100755 tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_X_wrong_instrument.xml diff --git a/cg/apps/demultiplex/sample_sheet/create.py b/cg/apps/demultiplex/sample_sheet/create.py index 08024a7de5..dae433a2d1 100644 --- a/cg/apps/demultiplex/sample_sheet/create.py +++ b/cg/apps/demultiplex/sample_sheet/create.py @@ -17,7 +17,7 @@ def get_sample_sheet_creator( lims_samples: list[FlowCellSample], force: bool, ) -> SampleSheetCreator: - """Returns an initialised sample sheet creator according to the software used for demultiplexing.""" + """Returns an initialised sample sheet creator according to the demultiplexing software.""" if flow_cell.bcl_converter == BclConverter.BCL2FASTQ: return SampleSheetCreatorBcl2Fastq( flow_cell=flow_cell, lims_samples=lims_samples, force=force @@ -31,7 +31,7 @@ def create_sample_sheet( force: bool = False, ) -> list[list[str]]: """Create a sample sheet for a flow cell.""" - sample_sheet_creator = get_sample_sheet_creator( + sample_sheet_creator: SampleSheetCreator = get_sample_sheet_creator( flow_cell=flow_cell, lims_samples=lims_samples, force=force, diff --git a/cg/apps/demultiplex/sample_sheet/index.py b/cg/apps/demultiplex/sample_sheet/index.py index 68d64b625c..e7d9a1c3e7 100644 --- a/cg/apps/demultiplex/sample_sheet/index.py +++ b/cg/apps/demultiplex/sample_sheet/index.py @@ -58,7 +58,7 @@ def get_valid_indexes(dual_indexes_only: bool = True) -> list[Index]: def get_reagent_kit_version(reagent_kit_version: str) -> str: """Derives the reagent kit version from the run parameters.""" - LOG.info(f"Converting reagent kit parameter {reagent_kit_version} to version") + LOG.debug(f"Converting reagent kit parameter {reagent_kit_version} to version") if reagent_kit_version not in REAGENT_KIT_PARAMETER_TO_VERSION: raise SyntaxError(f"Unknown reagent kit version {reagent_kit_version}") @@ -69,8 +69,8 @@ def get_index_pair(sample: FlowCellSample) -> tuple[str, str]: """Returns a sample index separated into index 1 and index 2.""" if is_dual_index(sample.index): index_1, index_2 = sample.index.split("-") - return index_1.strip(), index_2.strip() - return sample.index, sample.index2 + return index_1.strip().replace("NNNNNNNNN", ""), index_2.strip() + return sample.index.replace("NNNNNNNNN", ""), sample.index2 def is_reverse_complement_needed(run_parameters: RunParameters) -> bool: @@ -78,13 +78,13 @@ def is_reverse_complement_needed(run_parameters: RunParameters) -> bool: If the run used the new NovaSeq control software version (NEW_CONTROL_SOFTWARE_VERSION) and the new reagent kit version (NEW_REAGENT_KIT_VERSION), then it requires reverse complement. - If the run is NovaSeqX, does not require reverse complement. + If the run is NovaSeqX, HiSeqX or HiSeq2500, does not require reverse complement. """ - if run_parameters.sequencer == Sequencers.NOVASEQX: + if run_parameters.sequencer != Sequencers.NOVASEQ: return False control_software_version: str = run_parameters.control_software_version reagent_kit_version: str = run_parameters.reagent_kit_version - LOG.info("Check if run is reverse complement") + LOG.debug("Check if run is reverse complement") if version.parse(version=control_software_version) < version.parse( version=NEW_CONTROL_SOFTWARE_VERSION ): @@ -98,7 +98,7 @@ def is_reverse_complement_needed(run_parameters: RunParameters) -> bool: f"Reagent kit version {reagent_kit_version} does not does not need reverse complement" ) return False - LOG.info("Run is reverse complement") + LOG.debug("Run is reverse complement") return True @@ -221,11 +221,17 @@ def update_indexes_for_samples( samples: list[FlowCellSampleBCLConvert | FlowCellSampleBcl2Fastq], index_cycles: int, is_reverse_complement: bool, + sequencer: str, ) -> None: """Updates the values to the fields index1 and index 2 of samples.""" for sample in samples: - pad_and_reverse_complement_sample_indexes( - sample=sample, - index_cycles=index_cycles, - is_reverse_complement=is_reverse_complement, - ) + if sequencer != Sequencers.NOVASEQ: + index1, index2 = get_index_pair(sample=sample) + sample.index = index1 + sample.index2 = index2 + else: + pad_and_reverse_complement_sample_indexes( + sample=sample, + index_cycles=index_cycles, + is_reverse_complement=is_reverse_complement, + ) diff --git a/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py b/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py index 1744b3c1a6..f931057783 100644 --- a/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py +++ b/cg/apps/demultiplex/sample_sheet/sample_sheet_creator.py @@ -91,7 +91,6 @@ def convert_sample_to_header_dict( data_column_names: list[str], ) -> list[str]: """Convert a lims sample object to a list that corresponds to the sample sheet headers.""" - LOG.debug(f"Use sample sheet header {data_column_names}") sample_dict = sample.model_dump(by_alias=True) return [str(sample_dict[column]) for column in data_column_names] @@ -106,14 +105,16 @@ def get_data_section_header_and_columns(self) -> list[list[str]] | None: def create_sample_sheet_content(self) -> list[list[str]]: """Create sample sheet content with samples.""" LOG.info("Creating sample sheet content") + complete_data_section: list[list[str]] = self.get_data_section_header_and_columns() sample_sheet_content: list[list[str]] = ( - self.get_additional_sections_sample_sheet() + self.get_data_section_header_and_columns() + self.get_additional_sections_sample_sheet() + complete_data_section ) + LOG.debug(f"Use sample sheet header {complete_data_section[1]}") for sample in self.lims_samples: sample_sheet_content.append( self.convert_sample_to_header_dict( sample=sample, - data_column_names=self.get_data_section_header_and_columns()[1], + data_column_names=complete_data_section[1], ) ) return sample_sheet_content @@ -129,6 +130,7 @@ def process_samples_for_sample_sheet(self) -> None: samples=samples_in_lane, index_cycles=self.run_parameters.index_length, is_reverse_complement=self.is_reverse_complement, + sequencer=self.run_parameters.sequencer, ) self.update_barcode_mismatch_values_for_samples(samples_in_lane) @@ -157,7 +159,7 @@ def update_barcode_mismatch_values_for_samples(self, *args) -> None: def add_override_cycles_to_samples(self) -> None: """Return None for flow cells to be demultiplexed with Bcl2fastq.""" - LOG.debug("No adding of override cycles for Bcl2fastq flow cell") + LOG.debug("Skipping adding of override cycles for Bcl2fastq flow cell") def get_additional_sections_sample_sheet(self) -> list[list[str]]: """Return all sections of the sample sheet that are not the data section.""" @@ -201,19 +203,22 @@ def update_barcode_mismatch_values_for_samples( def add_override_cycles_to_samples(self) -> None: """Add override cycles attribute to samples.""" - flow_cell_index_len: int = self.run_parameters.index_length read1_cycles: str = f"Y{self.run_parameters.get_read_1_cycles()};" read2_cycles: str = f"Y{self.run_parameters.get_read_2_cycles()}" + length_index1: int = self.run_parameters.get_index_1_cycles() + length_index2: int = self.run_parameters.get_index_2_cycles() for sample in self.lims_samples: - index1_cycles: str = f"I{self.run_parameters.get_index_1_cycles()};" - index2_cycles: str = f"I{self.run_parameters.get_index_2_cycles()};" - sample_index_len: int = len(get_index_pair(sample)[0]) - if sample_index_len < flow_cell_index_len: - index1_cycles = f"I{sample_index_len}N{flow_cell_index_len - sample_index_len};" + index1_cycles: str = f"I{length_index1};" + index2_cycles: str = f"I{length_index2};" + sample_index1_len: int = len(get_index_pair(sample)[0]) + sample_index2_len: int = len(get_index_pair(sample)[1]) + if sample_index1_len < length_index1: + index1_cycles = f"I{sample_index1_len}N{length_index1 - sample_index1_len};" + if sample_index2_len < length_index2: index2_cycles = ( - f"I{sample_index_len}N{flow_cell_index_len - sample_index_len};" + f"I{sample_index2_len}N{length_index2 - sample_index2_len};" if self.is_reverse_complement - else f"N{flow_cell_index_len - sample_index_len}I{sample_index_len};" + else f"N{length_index2 - sample_index2_len}I{sample_index2_len};" ) sample.override_cycles = read1_cycles + index1_cycles + index2_cycles + read2_cycles @@ -232,21 +237,21 @@ def get_additional_sections_sample_sheet(self) -> list[list[str]]: SampleSheetBCLConvertSections.Header.INDEX_ORIENTATION_FORWARD.value, ] reads_section: list[list[str]] = [ - [SampleSheetBCLConvertSections.Reads.HEADER.value], + [SampleSheetBCLConvertSections.Reads.HEADER], [ - SampleSheetBCLConvertSections.Reads.READ_CYCLES_1.value, + SampleSheetBCLConvertSections.Reads.READ_CYCLES_1, self.run_parameters.get_read_1_cycles(), ], [ - SampleSheetBCLConvertSections.Reads.READ_CYCLES_2.value, + SampleSheetBCLConvertSections.Reads.READ_CYCLES_2, self.run_parameters.get_read_2_cycles(), ], [ - SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_1.value, + SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_1, self.run_parameters.get_index_1_cycles(), ], [ - SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_2.value, + SampleSheetBCLConvertSections.Reads.INDEX_CYCLES_2, self.run_parameters.get_index_2_cycles(), ], ] diff --git a/cg/cli/demultiplex/sample_sheet.py b/cg/cli/demultiplex/sample_sheet.py index 70e223b141..79878d55dd 100644 --- a/cg/cli/demultiplex/sample_sheet.py +++ b/cg/cli/demultiplex/sample_sheet.py @@ -7,18 +7,14 @@ from cg.apps.demultiplex.sample_sheet.create import create_sample_sheet from cg.apps.demultiplex.sample_sheet.models import FlowCellSample -from cg.apps.demultiplex.sample_sheet.read_sample_sheet import ( - get_sample_sheet_from_file, -) +from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_sample_sheet_from_file from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims.sample_sheet import get_flow_cell_samples from cg.constants.constants import DRY_RUN, FileFormat from cg.constants.demultiplexing import OPTION_BCL_CONVERTER from cg.exc import FlowCellError, HousekeeperFileMissingError from cg.io.controller import WriteFile, WriteStream -from cg.meta.demultiplex.housekeeper_storage_functions import ( - add_sample_sheet_path_to_housekeeper, -) +from cg.meta.demultiplex.housekeeper_storage_functions import add_sample_sheet_path_to_housekeeper from cg.models.cg_config import CGConfig from cg.models.flow_cell.flow_cell import FlowCellDirectoryData diff --git a/cg/constants/demultiplexing.py b/cg/constants/demultiplexing.py index 1895610283..a0b021e8fa 100644 --- a/cg/constants/demultiplexing.py +++ b/cg/constants/demultiplexing.py @@ -22,10 +22,11 @@ class DemultiplexingDirsAndFiles(StrEnum): DELIVERY: str = "delivery.txt" DEMUX_STARTED: str = "demuxstarted.txt" DEMUX_COMPLETE: str = "demuxcomplete.txt" - Hiseq_X_COPY_COMPLETE: str = "copycomplete.txt" - Hiseq_X_TILE_DIR: str = "l1t11" + HISEQ_X_COPY_COMPLETE: str = "copycomplete.txt" + HISEQ_X_TILE_DIR: str = "l1t11" RTACOMPLETE: str = "RTAComplete.txt" - RUN_PARAMETERS: str = "RunParameters.xml" + RUN_PARAMETERS_PASCAL_CASE: str = "RunParameters.xml" + RUN_PARAMETERS_CAMEL_CASE: str = "runParameters.xml" SAMPLE_SHEET_FILE_NAME: str = "SampleSheet.csv" UNALIGNED_DIR_NAME: str = "Unaligned" BCL2FASTQ_TILE_DIR_PATTERN: str = r"l\dt\d{2}" @@ -46,23 +47,31 @@ class RunParametersXMLNodes(StrEnum): # Node names APPLICATION: str = ".Application" + APPLICATION_NAME: str = ".//ApplicationName" APPLICATION_VERSION: str = ".ApplicationVersion" CYCLES: str = "Cycles" + INDEX_1_HISEQ: str = ".//IndexRead1" + INDEX_2_HISEQ: str = ".//IndexRead2" INDEX_1_NOVASEQ_6000: str = "./IndexRead1NumberOfCycles" INDEX_2_NOVASEQ_6000: str = "./IndexRead2NumberOfCycles" INDEX_1_NOVASEQ_X: str = "Index1" INDEX_2_NOVASEQ_X: str = "Index2" INNER_READ: str = ".//Read" INSTRUMENT_TYPE: str = ".InstrumentType" - PLANNED_READS: str = "./PlannedReads" + PLANNED_READS_HISEQ: str = ".//Reads" + PLANNED_READS_NOVASEQ_X: str = "./PlannedReads" + READ_1_HISEQ: str = ".//Read1" + READ_2_HISEQ: str = ".//Read2" READ_1_NOVASEQ_6000: str = "./Read1NumberOfCycles" READ_2_NOVASEQ_6000: str = "./Read2NumberOfCycles" READ_1_NOVASEQ_X: str = "Read1" READ_2_NOVASEQ_X: str = "Read2" READ_NAME: str = "ReadName" REAGENT_KIT_VERSION: str = "./RfidsInfo/SbsConsumableVersion" + SEQUENCER_ID: str = ".//ScannerID" # Node Values + HISEQ_APPLICATION: str = "HiSeq Control Software" NOVASEQ_6000_APPLICATION: str = "NovaSeq Control Software" NOVASEQ_X_INSTRUMENT: str = "NovaSeqXPlus" UNKNOWN_REAGENT_KIT_VERSION: str = "unknown" diff --git a/cg/constants/sequencing.py b/cg/constants/sequencing.py index 352b89f7a1..aa4c65d42b 100644 --- a/cg/constants/sequencing.py +++ b/cg/constants/sequencing.py @@ -13,7 +13,7 @@ class Sequencers(StrEnum): OTHER: str = "other" -sequencer_types = { +SEQUENCER_TYPES = { "D00134": Sequencers.HISEQGA, "D00410": Sequencers.HISEQGA, "D00415": Sequencers.HISEQGA, diff --git a/cg/exc.py b/cg/exc.py index 13dee35fb4..dcb8b16920 100644 --- a/cg/exc.py +++ b/cg/exc.py @@ -228,3 +228,7 @@ class MetricsQCError(CgError): class MissingMetrics(CgError): """Exception raised when mandatory metrics are missing.""" + + +class XMLError(CgError): + """Exception raised when something is wrong with the content of an XML file.""" diff --git a/cg/io/xml.py b/cg/io/xml.py index 5938002f61..09f8e8239b 100644 --- a/cg/io/xml.py +++ b/cg/io/xml.py @@ -1,19 +1,40 @@ """Module for reading and writing xml files.""" - -import xml.etree.ElementTree as ET +import logging from pathlib import Path +from xml.etree.ElementTree import Element, ElementTree, parse from cg.constants import FileExtensions +from cg.exc import XMLError from cg.io.validate_path import validate_file_suffix +LOG = logging.getLogger(__name__) + -def read_xml(file_path: Path) -> ET.ElementTree: +def read_xml(file_path: Path) -> ElementTree: """Read content in a xml file to an ElementTree.""" validate_file_suffix(path_to_validate=file_path, target_suffix=FileExtensions.XML) - tree = ET.parse(file_path) + tree = parse(file_path) return tree -def write_xml(tree: ET.ElementTree, file_path: Path) -> None: +def write_xml(tree: ElementTree, file_path: Path) -> None: """Write content to a xml file.""" tree.write(file_path, encoding="utf-8", xml_declaration=True) + + +def validate_node_exists(node: Element | None, name: str) -> None: + """Validates if the given node is not None. + Raises: + XMLError: If the node is None + """ + if node is None: + message = f"Could not find node with name {name} in XML tree" + LOG.warning(message) + raise XMLError(message) + + +def get_tree_node(tree: ElementTree, node_name: str) -> Element: + """Return the node of a tree given its name if it exists.""" + xml_node: Element = tree.find(node_name) + validate_node_exists(node=xml_node, name=node_name) + return xml_node diff --git a/cg/models/demultiplex/run_parameters.py b/cg/models/demultiplex/run_parameters.py index 8ca9cdd223..b950f2ba29 100644 --- a/cg/models/demultiplex/run_parameters.py +++ b/cg/models/demultiplex/run_parameters.py @@ -1,12 +1,13 @@ -"""Module for modeling run parameters file parsing.""" +"""Module for parsing sequencing run metadata from RunParameters file.""" import logging +from abc import abstractmethod from pathlib import Path from xml.etree import ElementTree from cg.constants.demultiplexing import RunParametersXMLNodes -from cg.constants.sequencing import Sequencers -from cg.exc import RunParametersError -from cg.io.xml import read_xml +from cg.constants.sequencing import SEQUENCER_TYPES, Sequencers +from cg.exc import RunParametersError, XMLError +from cg.io.xml import get_tree_node, read_xml LOG = logging.getLogger(__name__) @@ -19,61 +20,86 @@ def __init__(self, run_parameters_path: Path): self.tree: ElementTree = read_xml(file_path=run_parameters_path) self.validate_instrument() + def validate_instrument(self) -> None: + """Raise an error if the parent class was instantiated.""" + raise NotImplementedError( + "Parent class instantiated. Instantiate instead RunParametersHiSeq, " + "RunParametersNovaSeq6000 or RunParametersNovaSeqX" + ) + + def _validate_instrument(self, node_name: str, node_value: str): + """Fetches the node from an XML file and compares it with the expected value. + Raises: + RunParametersError if the node does not have the expected value.""" + try: + application: ElementTree.Element | None = get_tree_node( + tree=self.tree, node_name=node_name + ) + except XMLError: + raise RunParametersError( + f"Could not find node {node_name} in the run parameters file. " + "Check that the correct run parameters file is used" + ) + if application.text != node_value: + raise RunParametersError(f"The file parsed does not correspond to {node_value}") + @property def index_length(self) -> int: """Return the length of the indexes if they are equal, raise an error otherwise.""" index_one_length: int = self.get_index_1_cycles() index_two_length: int = self.get_index_2_cycles() - if index_one_length != index_two_length: + if index_one_length != index_two_length and self.sequencer not in [ + Sequencers.HISEQX, + Sequencers.HISEQGA, + ]: raise RunParametersError("Index lengths are not the same!") return index_one_length - @staticmethod - def node_not_found(node: ElementTree.Element | None, name: str) -> None: - """Raise exception if the given node is not found.""" - if node is None: - message = f"Could not determine {name}" - LOG.warning(message) - raise RunParametersError(message) + def get_node_string_value(self, node_name: str) -> str: + """Return the value of the node as a string if its validation passes.""" + return get_tree_node(tree=self.tree, node_name=node_name).text - def validate_instrument(self) -> None: - """Raise an error if the parent class was instantiated.""" - raise NotImplementedError( - "Parent class instantiated. Instantiate instead RunParametersNovaSeq6000 or RunParametersNovaSeqX" - ) + def get_node_integer_value(self, node_name: str) -> int: + """Return the value of the node as an integer if its validation passes.""" + return int(self.get_node_string_value(node_name=node_name)) @property + @abstractmethod def control_software_version(self) -> str | None: """Return the control software version if existent.""" - raise NotImplementedError( - "Impossible to retrieve control software version from parent class" - ) + pass @property + @abstractmethod def reagent_kit_version(self) -> str | None: """Return the reagent kit version if existent.""" - raise NotImplementedError("Impossible to retrieve reagent kit version from parent class") + pass @property + @abstractmethod def sequencer(self) -> str | None: """Return the sequencer associated with the current run parameters.""" - raise NotImplementedError("Impossible to retrieve sequencer from parent class") + pass - def get_index_1_cycles(self) -> int: + @abstractmethod + def get_index_1_cycles(self) -> int | None: """Return the number of cycles in the first index read.""" - raise NotImplementedError("Impossible to retrieve index1 cycles from parent class") + pass - def get_index_2_cycles(self) -> int: + @abstractmethod + def get_index_2_cycles(self) -> int | None: """Return the number of cycles in the second index read.""" - raise NotImplementedError("Impossible to retrieve index2 cycles from parent class") + pass - def get_read_1_cycles(self) -> int: + @abstractmethod + def get_read_1_cycles(self) -> int | None: """Return the number of cycles in the first read.""" - raise NotImplementedError("Impossible to retrieve read1 cycles from parent class") + pass - def get_read_2_cycles(self) -> int: + @abstractmethod + def get_read_2_cycles(self) -> int | None: """Return the number of cycles in the second read.""" - raise NotImplementedError("Impossible to retrieve read2 cycles from parent class") + pass def __str__(self): return f"RunParameters(path={self.path}, sequencer={self.sequencer})" @@ -88,26 +114,69 @@ def __repr__(self): ) +class RunParametersHiSeq(RunParameters): + """Specific class for parsing run parameters of HiSeq2500 sequencing.""" + + def validate_instrument(self) -> None: + """Validate if a HiSeq file was used to instantiate the class.""" + self._validate_instrument( + node_name=RunParametersXMLNodes.APPLICATION_NAME, + node_value=RunParametersXMLNodes.HISEQ_APPLICATION, + ) + + @property + def control_software_version(self) -> None: + """Return None for run parameters associated with HiSeq sequencing.""" + return + + @property + def reagent_kit_version(self) -> None: + """Return None for run parameters associated with HiSeq sequencing.""" + return + + @property + def sequencer(self) -> str: + """Return the sequencer associated with the current run parameters.""" + node_name: str = RunParametersXMLNodes.SEQUENCER_ID + sequencer: str = self.get_node_string_value(node_name=node_name) + return SEQUENCER_TYPES.get(sequencer) + + def get_index_1_cycles(self) -> int: + """Return the number of cycles in the first index read.""" + node_name: str = RunParametersXMLNodes.INDEX_1_HISEQ + return self.get_node_integer_value(node_name=node_name) + + def get_index_2_cycles(self) -> int: + """Return the number of cycles in the second index read.""" + node_name: str = RunParametersXMLNodes.INDEX_2_HISEQ + return self.get_node_integer_value(node_name=node_name) + + def get_read_1_cycles(self) -> int: + """Return the number of cycles in the first read.""" + node_name: str = RunParametersXMLNodes.READ_1_HISEQ + return self.get_node_integer_value(node_name=node_name) + + def get_read_2_cycles(self) -> int: + """Return the number of cycles in the second read.""" + node_name: str = RunParametersXMLNodes.READ_2_HISEQ + return self.get_node_integer_value(node_name=node_name) + + class RunParametersNovaSeq6000(RunParameters): """Specific class for parsing run parameters of NovaSeq6000 sequencing.""" def validate_instrument(self) -> None: - """Raise an error if the class was not instantiated with a NovaSeq6000 file.""" - node_name: str = RunParametersXMLNodes.APPLICATION - xml_node: ElementTree.Element | None = self.tree.find(node_name) - self.node_not_found(node=xml_node, name="Instrument") - if xml_node.text != RunParametersXMLNodes.NOVASEQ_6000_APPLICATION: - raise RunParametersError( - "The file parsed does not correspond to a NovaSeq6000 instrument" - ) + """Validate if a NovaSeq6000 file was used to instantiate the class.""" + self._validate_instrument( + node_name=RunParametersXMLNodes.APPLICATION, + node_value=RunParametersXMLNodes.NOVASEQ_6000_APPLICATION, + ) @property def control_software_version(self) -> str: """Return the control software version.""" node_name: str = RunParametersXMLNodes.APPLICATION_VERSION - xml_node: ElementTree.Element | None = self.tree.find(node_name) - self.node_not_found(node=xml_node, name="control software version") - return xml_node.text + return self.get_node_string_value(node_name=node_name) @property def reagent_kit_version(self) -> str: @@ -123,45 +192,38 @@ def reagent_kit_version(self) -> str: @property def sequencer(self) -> str: """Return the sequencer associated with the current run parameters.""" - return Sequencers.NOVASEQ.value - - def get_node_integer_value(self, node_name: str, name: str) -> int: - """Return the value of the node as an integer.""" - xml_node = self.tree.find(node_name) - self.node_not_found(node=xml_node, name=name) - return int(xml_node.text) + return Sequencers.NOVASEQ def get_index_1_cycles(self) -> int: """Return the number of cycles in the first index read.""" - node_name = RunParametersXMLNodes.INDEX_1_NOVASEQ_6000 - return self.get_node_integer_value(node_name=node_name, name="length of index one") + node_name: str = RunParametersXMLNodes.INDEX_1_NOVASEQ_6000 + return self.get_node_integer_value(node_name=node_name) def get_index_2_cycles(self) -> int: """Return the number of cycles in the second index read.""" - node_name = RunParametersXMLNodes.INDEX_2_NOVASEQ_6000 - return self.get_node_integer_value(node_name=node_name, name="length of index two") + node_name: str = RunParametersXMLNodes.INDEX_2_NOVASEQ_6000 + return self.get_node_integer_value(node_name=node_name) def get_read_1_cycles(self) -> int: """Return the number of cycles in the first read.""" - node_name = RunParametersXMLNodes.READ_1_NOVASEQ_6000 - return self.get_node_integer_value(node_name=node_name, name="length of reads one") + node_name: str = RunParametersXMLNodes.READ_1_NOVASEQ_6000 + return self.get_node_integer_value(node_name=node_name) def get_read_2_cycles(self) -> int: """Return the number of cycles in the second read.""" - node_name = RunParametersXMLNodes.READ_2_NOVASEQ_6000 - return self.get_node_integer_value(node_name=node_name, name="length of reads two") + node_name: str = RunParametersXMLNodes.READ_2_NOVASEQ_6000 + return self.get_node_integer_value(node_name=node_name) class RunParametersNovaSeqX(RunParameters): """Specific class for parsing run parameters of NovaSeqX sequencing.""" def validate_instrument(self) -> None: - """Raise an error if the class was not instantiated with a NovaSeqX file.""" - node_name: str = RunParametersXMLNodes.INSTRUMENT_TYPE - xml_node: ElementTree.Element | None = self.tree.find(node_name) - self.node_not_found(node=xml_node, name="Instrument") - if xml_node.text != RunParametersXMLNodes.NOVASEQ_X_INSTRUMENT: - raise RunParametersError("The file parsed does not correspond to a NovaSeqX instrument") + """Validate if a NovaSeqX file was used to instantiate the class.""" + self._validate_instrument( + node_name=RunParametersXMLNodes.INSTRUMENT_TYPE, + node_value=RunParametersXMLNodes.NOVASEQ_X_INSTRUMENT, + ) @property def control_software_version(self) -> None: @@ -176,18 +238,19 @@ def reagent_kit_version(self) -> None: @property def sequencer(self) -> str: """Return the sequencer associated with the current run parameters.""" - return Sequencers.NOVASEQX.value + return Sequencers.NOVASEQX @property def read_parser(self) -> dict[str, int]: """Return read and index cycle values parsed as a dictionary.""" cycle_mapping: dict[str, int] = {} - planned_reads: ElementTree.Element | None = self.tree.find( - RunParametersXMLNodes.PLANNED_READS + planned_reads_tree: ElementTree.Element = get_tree_node( + tree=self.tree, node_name=RunParametersXMLNodes.PLANNED_READS_NOVASEQ_X + ) + planned_reads: list[ElementTree.Element] = planned_reads_tree.findall( + RunParametersXMLNodes.INNER_READ ) - self.node_not_found(node=planned_reads, name="PlannedReads") - read_elem: ElementTree.Element - for read_elem in planned_reads.findall(RunParametersXMLNodes.INNER_READ): + for read_elem in planned_reads: read_name: str = read_elem.get(RunParametersXMLNodes.READ_NAME) cycles: int = int(read_elem.get(RunParametersXMLNodes.CYCLES)) cycle_mapping[read_name] = cycles diff --git a/cg/models/flow_cell/flow_cell.py b/cg/models/flow_cell/flow_cell.py index daec2fe0e8..fddbbbb3aa 100644 --- a/cg/models/flow_cell/flow_cell.py +++ b/cg/models/flow_cell/flow_cell.py @@ -1,6 +1,7 @@ """Module for modeling flow cells.""" import datetime import logging +import os from pathlib import Path from typing import Type @@ -19,16 +20,23 @@ from cg.constants.bcl_convert_metrics import SAMPLE_SHEET_HEADER from cg.constants.constants import LENGTH_LONG_DATE from cg.constants.demultiplexing import BclConverter, DemultiplexingDirsAndFiles -from cg.constants.sequencing import Sequencers, sequencer_types +from cg.constants.sequencing import Sequencers, SEQUENCER_TYPES from cg.exc import FlowCellError, SampleSheetError from cg.models.demultiplex.run_parameters import ( RunParameters, + RunParametersHiSeq, RunParametersNovaSeq6000, RunParametersNovaSeqX, ) from cg.models.flow_cell.utils import parse_date LOG = logging.getLogger(__name__) +RUN_PARAMETERS_CONSTRUCTOR: dict[str, Type] = { + Sequencers.HISEQGA: RunParametersHiSeq, + Sequencers.HISEQX: RunParametersHiSeq, + Sequencers.NOVASEQ: RunParametersNovaSeq6000, + Sequencers.NOVASEQX: RunParametersNovaSeqX, +} class FlowCellDirectoryData: @@ -45,7 +53,7 @@ def __init__(self, flow_cell_path: Path, bcl_converter: str | None = None): self.id: str = "" self.position: Literal["A", "B"] = "A" self.parse_flow_cell_dir_name() - self.bcl_converter: str | None = self.get_bcl_converter(bcl_converter) + self.bcl_converter: str = self.get_bcl_converter(bcl_converter) self._sample_sheet_path_hk: Path | None = None def parse_flow_cell_dir_name(self): @@ -86,7 +94,7 @@ def sample_sheet_path(self) -> Path: """ Return sample sheet path. """ - return Path(self.path, DemultiplexingDirsAndFiles.SAMPLE_SHEET_FILE_NAME.value) + return Path(self.path, DemultiplexingDirsAndFiles.SAMPLE_SHEET_FILE_NAME) def set_sample_sheet_path_hk(self, hk_path: Path): self._sample_sheet_path_hk = hk_path @@ -98,21 +106,24 @@ def get_sample_sheet_path_hk(self) -> Path: @property def run_parameters_path(self) -> Path: - """Return path to run parameters file.""" - return Path(self.path, DemultiplexingDirsAndFiles.RUN_PARAMETERS) + """Return path to run parameters file if it exists. + Raises: + FlowCellError if the flow cell has no run parameters file.""" + if DemultiplexingDirsAndFiles.RUN_PARAMETERS_PASCAL_CASE in os.listdir(self.path): + return Path(self.path, DemultiplexingDirsAndFiles.RUN_PARAMETERS_PASCAL_CASE) + elif DemultiplexingDirsAndFiles.RUN_PARAMETERS_CAMEL_CASE in os.listdir(self.path): + return Path(self.path, DemultiplexingDirsAndFiles.RUN_PARAMETERS_CAMEL_CASE) + else: + message: str = f"No run parameters file found in flow cell {self.path}" + LOG.error(message) + raise FlowCellError(message) @property def run_parameters(self) -> RunParameters: """Return run parameters object.""" - if not self.run_parameters_path.exists(): - message = f"Could not find run parameters file {self.run_parameters_path}" - LOG.warning(message) - raise FileNotFoundError(message) if not self._run_parameters: - self._run_parameters = ( - RunParametersNovaSeqX(run_parameters_path=self.run_parameters_path) - if self.sequencer_type == Sequencers.NOVASEQX - else RunParametersNovaSeq6000(run_parameters_path=self.run_parameters_path) + self._run_parameters = RUN_PARAMETERS_CONSTRUCTOR[self.sequencer_type]( + run_parameters_path=self.run_parameters_path ) return self._run_parameters @@ -130,14 +141,14 @@ def sequencer_type( self, ) -> Literal[Sequencers.HISEQX, Sequencers.HISEQGA, Sequencers.NOVASEQ, Sequencers.NOVASEQX]: """Return the sequencer type.""" - return sequencer_types[self.machine_name] + return SEQUENCER_TYPES[self.machine_name] def get_bcl_converter(self, bcl_converter: str) -> str: """ Return the BCL converter to use. Tries to get the BCL converter from the sequencer type if not provided. Note: bcl_converter can be used to override automatic selection. - Reason: Data reproducability. + Reason: Data reproducibility. """ return bcl_converter or self.get_bcl_converter_by_sequencer() @@ -163,12 +174,12 @@ def copy_complete_path(self) -> Path: @property def hiseq_x_copy_complete_path(self) -> Path: - """Return copy complete path for Hiseq X.""" - return Path(self.path, DemultiplexingDirsAndFiles.Hiseq_X_COPY_COMPLETE) + """Return copy complete path for HiSeqX.""" + return Path(self.path, DemultiplexingDirsAndFiles.HISEQ_X_COPY_COMPLETE) @property def hiseq_x_delivery_started_path(self) -> Path: - """Return delivery started path for Hiseq X.""" + """Return delivery started path for HiSeqX.""" return Path(self.path, DemultiplexingDirsAndFiles.DELIVERY) @property diff --git a/tests/apps/demultiplex/conftest.py b/tests/apps/demultiplex/conftest.py index 8b61f8dec7..f23774e0ef 100644 --- a/tests/apps/demultiplex/conftest.py +++ b/tests/apps/demultiplex/conftest.py @@ -56,12 +56,12 @@ def valid_index() -> Index: @pytest.fixture def bcl2fastq_sample_sheet_creator( - bcl2fastq_flow_cell: FlowCellDirectoryData, + hiseq_x_flow_cell: FlowCellDirectoryData, lims_novaseq_bcl2fastq_samples: list[FlowCellSampleBcl2Fastq], ) -> SampleSheetCreatorBcl2Fastq: """Returns a sample sheet creator for version 1 sample sheets with bcl2fastq format.""" return SampleSheetCreatorBcl2Fastq( - flow_cell=bcl2fastq_flow_cell, + flow_cell=hiseq_x_flow_cell, lims_samples=lims_novaseq_bcl2fastq_samples, ) diff --git a/tests/apps/demultiplex/test_sample_sheet_creator.py b/tests/apps/demultiplex/test_sample_sheet_creator.py index 2fe2d53ece..469ca96126 100644 --- a/tests/apps/demultiplex/test_sample_sheet_creator.py +++ b/tests/apps/demultiplex/test_sample_sheet_creator.py @@ -43,7 +43,7 @@ def test_bcl_convert_sample_sheet_fails_with_bcl2fastq( def test_construct_bcl2fastq_sheet( - bcl2fastq_sample_sheet_creator: SampleSheetCreator, project_dir: Path + bcl2fastq_sample_sheet_creator: SampleSheetCreatorBcl2Fastq, project_dir: Path ): """Test that a created Bcl2fastq sample sheet has samples.""" # GIVEN a Bcl2fastq sample sheet creator populated with Bcl2fastq samples @@ -82,12 +82,12 @@ def test_construct_bcl_convert_sheet( def test_remove_unwanted_samples_dual_index( novaseq6000_flow_cell_sample_before_adapt_indexes: FlowCellSampleBcl2Fastq, - bcl2fastq_flow_cell: FlowCellDirectoryData, + hiseq_x_flow_cell: FlowCellDirectoryData, ): """Test that a sample with dual index is not removed.""" # GIVEN a sample sheet creator with a sample with dual index sample_sheet_creator: SampleSheetCreatorBcl2Fastq = SampleSheetCreatorBcl2Fastq( - flow_cell=bcl2fastq_flow_cell, + flow_cell=hiseq_x_flow_cell, lims_samples=[novaseq6000_flow_cell_sample_before_adapt_indexes], ) @@ -100,13 +100,13 @@ def test_remove_unwanted_samples_dual_index( def test_remove_unwanted_samples_no_dual_index( novaseq6000_flow_cell_sample_no_dual_index: FlowCellSampleBcl2Fastq, - bcl2fastq_flow_cell: FlowCellDirectoryData, + novaseq_6000_flow_cell: FlowCellDirectoryData, caplog, ): """Test that samples with no dual index are removed.""" # GIVEN a sample sheet creator with a sample without dual indexes sample_sheet_creator: SampleSheetCreatorBcl2Fastq = SampleSheetCreatorBcl2Fastq( - flow_cell=bcl2fastq_flow_cell, + flow_cell=novaseq_6000_flow_cell, lims_samples=[novaseq6000_flow_cell_sample_no_dual_index], ) diff --git a/tests/cli/demultiplex/test_create_sample_sheet.py b/tests/cli/demultiplex/test_create_sample_sheet.py index 055fb4ba84..e912bfb5fd 100644 --- a/tests/cli/demultiplex/test_create_sample_sheet.py +++ b/tests/cli/demultiplex/test_create_sample_sheet.py @@ -28,7 +28,6 @@ def test_create_sample_sheet_no_run_parameters_fails( flow_cell: FlowCellDirectoryData = FlowCellDirectoryData( flow_cell_path=tmp_flow_cells_directory_no_run_parameters ) - assert not flow_cell.run_parameters_path.exists() # GIVEN flow cell samples mocker.patch( @@ -50,7 +49,7 @@ def test_create_sample_sheet_no_run_parameters_fails( assert result.exit_code != EXIT_SUCCESS # THEN the correct information is communicated - assert "Could not find run parameters file" in caplog.text + assert "No run parameters file found in flow cell" in caplog.text def test_create_bcl2fastq_sample_sheet( diff --git a/tests/conftest.py b/tests/conftest.py index a1c0c01a7b..5b3f98c91d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,6 +44,7 @@ from cg.models import CompressionData from cg.models.cg_config import CGConfig, PDCArchivingDirectory from cg.models.demultiplex.run_parameters import ( + RunParametersHiSeq, RunParametersNovaSeq6000, RunParametersNovaSeqX, ) @@ -849,7 +850,7 @@ def fixture_tmp_flow_cell_name_malformed_sample_sheet() -> str: @pytest.fixture(name="tmp_flow_cell_name_no_sample_sheet") def tmp_flow_cell_name_no_sample_sheet() -> str: - """This is the name of a flow cell directory with the run parameters and sample sheet missing.""" + """Return the name of a flow cell directory with the run parameters and sample sheet missing.""" return "170407_A00689_0209_BHHKVCALXX" @@ -859,7 +860,7 @@ def tmp_flow_cell_name_ready_for_demultiplexing_bcl2fastq() -> str: return "211101_D00483_0615_AHLG5GDRXY" -@pytest.fixture(name="tmp_flow_cells_directory_no_run_parameters") +@pytest.fixture def tmp_flow_cells_directory_no_run_parameters( tmp_flow_cell_name_no_run_parameters: str, tmp_flow_cells_directory: Path ) -> Path: @@ -956,7 +957,7 @@ def tmp_demultiplexed_runs_not_finished_flow_cells_directory( tmp_path: Path, demux_results_not_finished_dir: Path ) -> Path: """ - Return the path to a temporary demultiplex-runs-unfinished that contains unfinished flow cells directory. + Return a temporary demultiplex-runs-unfinished path with an unfinished flow cell directory. Generates a copy of the original demultiplexed-runs-unfinished directory. """ original_dir = demux_results_not_finished_dir @@ -1210,12 +1211,24 @@ def flow_cells_demux_all_dir(demultiplex_fixtures: Path) -> Path: return Path(demultiplex_fixtures, "flow_cells_demux_all") -@pytest.fixture(name="demux_results_not_finished_dir") +@pytest.fixture(scope="session") def demux_results_not_finished_dir(demultiplex_fixtures: Path) -> Path: - """Return the path to a dir with demultiplexing results where demux has been done but nothing is cleaned.""" + """Return the path to a dir with demultiplexing results where nothing has been cleaned.""" return Path(demultiplex_fixtures, "demultiplexed-runs-unfinished") +@pytest.fixture(scope="session") +def hiseq_x_flow_cell_name() -> str: + """Return the full name of a HiSeq2500 flow cell with only one index.""" + return "160202_ST-E00266_0064_AHKHHGCCXX" + + +@pytest.fixture(scope="session") +def hiseq_2500_flow_cell_name() -> str: + """Return the full name of a HiSeq2500 flow cell with double indexes.""" + return "180504_D00410_0608_BHGYGYBCX2" + + @pytest.fixture(scope="session") def bcl2fastq_flow_cell_full_name() -> str: """Return full flow cell name.""" @@ -1234,12 +1247,24 @@ def novaseq_x_flow_cell_full_name() -> str: return "20230508_LH00188_0003_A22522YLT3" -@pytest.fixture +@pytest.fixture(scope="session") def novaseq_x_manifest_file(novaseq_x_flow_cell_dir: Path) -> Path: """Return the path to a NovaSeqX manifest file.""" return Path(novaseq_x_flow_cell_dir, "Manifest.tsv") +@pytest.fixture(scope="session") +def hiseq_x_flow_cell_dir(flow_cells_dir: Path, hiseq_x_flow_cell_name: str) -> Path: + """Return the path to a HiSeqX flow cell.""" + return Path(flow_cells_dir, hiseq_x_flow_cell_name) + + +@pytest.fixture(scope="session") +def hiseq_2500_flow_cell_dir(flow_cells_dir: Path, hiseq_2500_flow_cell_name: str) -> Path: + """Return the path to a HiSeq2500 flow cell.""" + return Path(flow_cells_dir, hiseq_2500_flow_cell_name) + + @pytest.fixture(scope="session") def bcl2fastq_flow_cell_dir(flow_cells_dir: Path, bcl2fastq_flow_cell_full_name: str) -> Path: """Return the path to the bcl2fastq flow cell demultiplex fixture directory.""" @@ -1276,28 +1301,61 @@ def run_parameters_missing_versions_path(run_parameters_dir: Path) -> Path: return Path(run_parameters_dir, "RunParameters_novaseq_no_software_nor_reagent_version.xml") +@pytest.fixture(scope="session") +def run_parameters_wrong_instrument(run_parameters_dir: Path) -> Path: + """Return a NovaSeqX run parameters file path with a wrong instrument value.""" + return Path(run_parameters_dir, "RunParameters_novaseq_X_wrong_instrument.xml") + + +@pytest.fixture(scope="session") +def hiseq_x_single_index_run_parameters_path( + hiseq_x_flow_cell_dir: Path, +) -> Path: + """Return the path to a HiSeqX run parameters file with single index.""" + return Path(hiseq_x_flow_cell_dir, DemultiplexingDirsAndFiles.RUN_PARAMETERS_CAMEL_CASE) + + +@pytest.fixture(scope="session") +def hiseq_2500_double_index_run_parameters_path( + hiseq_2500_flow_cell_dir: Path, +) -> Path: + """Return the path to a HiSeqX run parameters file with single index.""" + return Path(hiseq_2500_flow_cell_dir, DemultiplexingDirsAndFiles.RUN_PARAMETERS_PASCAL_CASE) + + @pytest.fixture(scope="session") def novaseq_6000_run_parameters_path(bcl2fastq_flow_cell_dir: Path) -> Path: - """Return the path to a file with NovaSeq6000 run parameters.""" - return Path(bcl2fastq_flow_cell_dir, "RunParameters.xml") + """Return the path to a NovaSeq6000 run parameters file.""" + return Path(bcl2fastq_flow_cell_dir, DemultiplexingDirsAndFiles.RUN_PARAMETERS_PASCAL_CASE) @pytest.fixture(scope="session") def novaseq_x_run_parameters_path(novaseq_x_flow_cell_dir: Path) -> Path: - """Return the path to a file with NovaSeqX run parameters.""" - return Path(novaseq_x_flow_cell_dir, "RunParameters.xml") + """Return the path to a NovaSeqX run parameters file.""" + return Path(novaseq_x_flow_cell_dir, DemultiplexingDirsAndFiles.RUN_PARAMETERS_PASCAL_CASE) -@pytest.fixture(scope="module") -def run_parameters_novaseq_6000_different_index_path(run_parameters_dir: Path) -> Path: - """Return the path to a NovaSeq6000 run parameters file with different index cycles.""" - return Path(run_parameters_dir, "RunParameters_novaseq_6000_different_index_cycles.xml") +@pytest.fixture(scope="function") +def run_parameters_hiseq_different_index(run_parameters_dir: Path) -> RunParametersHiSeq: + """Return a HiSeq RunParameters object with different index cycles.""" + path = Path(run_parameters_dir, "RunParameters_hiseq_2500_different_index_cycles.xml") + return RunParametersHiSeq(run_parameters_path=path) -@pytest.fixture(scope="module") -def run_parameters_novaseq_x_different_index_path(run_parameters_dir: Path) -> Path: - """Return the path to a NovaSeqX run parameters file with different index cycles.""" - return Path(run_parameters_dir, "RunParameters_novaseq_X_different_index_cycles.xml") +@pytest.fixture(scope="function") +def run_parameters_novaseq_6000_different_index( + run_parameters_dir: Path, +) -> RunParametersNovaSeq6000: + """Return a NovaSeq6000 RunParameters object with different index cycles.""" + path = Path(run_parameters_dir, "RunParameters_novaseq_6000_different_index_cycles.xml") + return RunParametersNovaSeq6000(run_parameters_path=path) + + +@pytest.fixture(scope="function") +def run_parameters_novaseq_x_different_index(run_parameters_dir: Path) -> RunParametersNovaSeqX: + """Return a NovaSeqX RunParameters object with different index cycles.""" + path = Path(run_parameters_dir, "RunParameters_novaseq_X_different_index_cycles.xml") + return RunParametersNovaSeqX(run_parameters_path=path) @pytest.fixture(scope="module") @@ -1308,6 +1366,22 @@ def run_parameters_missing_versions( return RunParametersNovaSeq6000(run_parameters_path=run_parameters_missing_versions_path) +@pytest.fixture(scope="session") +def hiseq_2500_run_parameters_double_index( + hiseq_2500_double_index_run_parameters_path: Path, +) -> RunParametersHiSeq: + """Return a NovaSeq6000 run parameters object.""" + return RunParametersHiSeq(run_parameters_path=hiseq_2500_double_index_run_parameters_path) + + +@pytest.fixture(scope="session") +def hiseq_x_run_parameters_single_index( + hiseq_x_single_index_run_parameters_path: Path, +) -> RunParametersHiSeq: + """Return a NovaSeq6000 run parameters object.""" + return RunParametersHiSeq(run_parameters_path=hiseq_x_single_index_run_parameters_path) + + @pytest.fixture(scope="session") def novaseq_6000_run_parameters( novaseq_6000_run_parameters_path: Path, @@ -1324,6 +1398,18 @@ def novaseq_x_run_parameters( return RunParametersNovaSeqX(run_parameters_path=novaseq_x_run_parameters_path) +@pytest.fixture(scope="module") +def hiseq_2500_flow_cell(hiseq_2500_flow_cell_dir: Path) -> FlowCellDirectoryData: + """Return a HiSeq2500 flow cell.""" + return FlowCellDirectoryData(flow_cell_path=hiseq_2500_flow_cell_dir) + + +@pytest.fixture(scope="module") +def hiseq_x_flow_cell(hiseq_x_flow_cell_dir: Path) -> FlowCellDirectoryData: + """Return a HiSeq2500 flow cell.""" + return FlowCellDirectoryData(flow_cell_path=hiseq_x_flow_cell_dir) + + @pytest.fixture(scope="session") def bcl2fastq_flow_cell(bcl2fastq_flow_cell_dir: Path) -> FlowCellDirectoryData: """Create a flow cell object with flow cell that is demultiplexed.""" @@ -1336,13 +1422,13 @@ def bcl2fastq_flow_cell(bcl2fastq_flow_cell_dir: Path) -> FlowCellDirectoryData: def novaseq_flow_cell_demultiplexed_with_bcl2fastq( bcl_convert_flow_cell_dir: Path, ) -> FlowCellDirectoryData: - """Create a Novaseq6000 flow cell object with flow cell that is demultiplexed using Bcl2fastq.""" + """Return a Novaseq6000 flow cell object demultiplexed using Bcl2fastq.""" return FlowCellDirectoryData( flow_cell_path=bcl_convert_flow_cell_dir, bcl_converter=BclConverter.BCL2FASTQ ) -@pytest.fixture(scope="session") +@pytest.fixture(scope="module") def bcl_convert_flow_cell(bcl_convert_flow_cell_dir: Path) -> FlowCellDirectoryData: """Create a bcl_convert flow cell object with flow cell that is demultiplexed.""" return FlowCellDirectoryData( @@ -1350,6 +1436,12 @@ def bcl_convert_flow_cell(bcl_convert_flow_cell_dir: Path) -> FlowCellDirectoryD ) +@pytest.fixture(scope="function") +def novaseq_6000_flow_cell(bcl_convert_flow_cell: FlowCellDirectoryData) -> FlowCellDirectoryData: + """Return a NovaSeq6000 flow cell object.""" + return bcl_convert_flow_cell + + @pytest.fixture(scope="function") def novaseq_x_flow_cell(novaseq_x_flow_cell_dir: Path) -> FlowCellDirectoryData: """Create a NovaSeqX flow cell object with flow cell that is demultiplexed.""" @@ -1364,7 +1456,7 @@ def bcl2fastq_flow_cell_id(bcl2fastq_flow_cell: FlowCellDirectoryData) -> str: return bcl2fastq_flow_cell.id -@pytest.fixture(scope="session") +@pytest.fixture(scope="module") def bcl_convert_flow_cell_id(bcl_convert_flow_cell: FlowCellDirectoryData) -> str: """Return flow cell id from bcl_convert flow cell object.""" return bcl_convert_flow_cell.id @@ -1378,8 +1470,8 @@ def demultiplexing_delivery_file(bcl2fastq_flow_cell: FlowCellDirectoryData) -> @pytest.fixture(name="hiseq_x_tile_dir") def hiseq_x_tile_dir(bcl2fastq_flow_cell: FlowCellDirectoryData) -> Path: - """Return Hiseq X tile dir.""" - return Path(bcl2fastq_flow_cell.path, DemultiplexingDirsAndFiles.Hiseq_X_TILE_DIR) + """Return HiSeqX tile dir.""" + return Path(bcl2fastq_flow_cell.path, DemultiplexingDirsAndFiles.HISEQ_X_TILE_DIR) @pytest.fixture(name="lims_novaseq_samples_file") diff --git a/tests/fixtures/apps/demultiplexing/SampleSheet.csv b/tests/fixtures/apps/demultiplexing/SampleSheet.csv deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/160202_ST-E00266_0064_AHKHHGCCXX/SampleSheet.csv b/tests/fixtures/apps/demultiplexing/flow_cells/160202_ST-E00266_0064_AHKHHGCCXX/SampleSheet.csv new file mode 100644 index 0000000000..f25fae2e17 --- /dev/null +++ b/tests/fixtures/apps/demultiplexing/flow_cells/160202_ST-E00266_0064_AHKHHGCCXX/SampleSheet.csv @@ -0,0 +1,10 @@ +[Data] +FCID,Lane,SampleID,SampleRef,index,SampleName,Control,Recipe,Operator,Project +HKHHGCCXX,1,AA123,hg19,TCCGGAGA,,N,R1,NN,1 +HKHHGCCXX,2,AA124,hg19,CGCTCATT,,N,R1,NN,1 +HKHHGCCXX,3,AA125,hg19,GAGATTCC,,N,R1,NN,1 +HKHHGCCXX,4,AA126,hg19,GAATTCGT,,N,R1,NN,2 +HKHHGCCXX,5,AA127,hg19,CGGCTATG,,N,R1,NN,3 +HKHHGCCXX,6,AA128,hg19,TCCGCGAA,,N,R1,NN,4 +HKHHGCCXX,7,AA129,hg19,TCTCGCGC,,N,R1,NN,5 +HKHHGCCXX,8,AA130,hg19,AGCGATAG,,N,R1,NN,6 diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/160202_ST-E00266_0064_AHKHHGCCXX/runParameters.xml b/tests/fixtures/apps/demultiplexing/flow_cells/160202_ST-E00266_0064_AHKHHGCCXX/runParameters.xml new file mode 100644 index 0000000000..12f6f42597 --- /dev/null +++ b/tests/fixtures/apps/demultiplexing/flow_cells/160202_ST-E00266_0064_AHKHHGCCXX/runParameters.xml @@ -0,0 +1,125 @@ + + + + false + 2 + HKHHGCCXX + -999 + A + SINGLEINDEX + 151 + 8 + 0 + 151 + X:\Runs + Save All Thumbnails + HiSeq X + true + + HiSeq X SBS + HiSeq X PE + HiSeq X Sequencing Primer + None + false + HiSeq Control Software + 3.3.39 + 160202_ST-E00266_0064_AHKHHGCCXX + 160202 + + clinical.genomics@scilifelab.se + 17177178 + true + true + true + + ST-E00266 + 64 + HWI-E00266 + 10.37.13 + 3.0.0 + 2.7.1 + 2.5.2.28 + Illumina,Bruno Fluidics Controller,0,v2.0420 + 2.21-C00-R03 + 7.5.180.4395 + 2.12.0.0 + + HKHHGCCXX + H1234CCXX + sbsuser + +
+
+
+
+
+
+
+
+ + DynamicITF + BothLaneSurfaces + AutoSwath + true + true + true + true + false + + 3 + 7 + 0 + 250 + 250 + 0 + 200 + 21 + 1 + 100 + 50 + 20 + 65535 + 50 + 4 + + 3200 + 7241 + 3200 + 174420 + 60.175 + 24 + 2 + false + + + + + + + + + x + false + 310 + false + false + true + + + + + x + + + + + + + 3.2.4 + false + false + + false + + 1 + \ No newline at end of file diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/180504_D00410_0608_BHGYGYBCX2/RunParameters.xml b/tests/fixtures/apps/demultiplexing/flow_cells/180504_D00410_0608_BHGYGYBCX2/RunParameters.xml new file mode 100644 index 0000000000..d8781c6da7 --- /dev/null +++ b/tests/fixtures/apps/demultiplexing/flow_cells/180504_D00410_0608_BHGYGYBCX2/RunParameters.xml @@ -0,0 +1,136 @@ + + + + HGYGYBCX2_ubiella_metvirus_RNA_9pM + -999 + B + DUALINDEX + true + 101 + 8 + 8 + 101 + X:\Runs + true + Save Tile Thumbnails + HiSeq Rapid Flow Cell v2 + false + false + HiSeq Rapid SBS Kit v2 + HiSeq Rapid PE Cluster Kit v2 + HiSeq Rapid PE Cluster Kit v2 + + 1 + 2 + + OnBoardClustering + RapidRunV2 + RapidRun + None + false + false + HiSeq Control Software + 2.2.58 + 180504_D00410_0608_BHGYGYBCX2 + 180504 + BaseSpace + + 84966884 + true + true + false + + D00410 + 608 + HWI-D00410 + 7.9.7 + 3.0.0 + 1.18.64 + Illumina,Bruno Fluidics Controller,0,v2.0340 + 2.01-F20-R02 + 6.45.20.3690 + + HGYGYBCX2 + sbsuser + +
+
+ + DynamicITF + BothLaneSurfaces + DualSwathFC + true + true + true + true + true + false + HiSeq Adapter Plate + HiSeq Flow Cell Holder + 5 + 8 + + 3 + 7 + 0 + 500 + 500 + 250 + 350 + 10 + 200 + 100 + 50 + 0 + 65535 + 100 + 4 + + 2048 + 10000 + 2048 + 160000 + 1.40625E-07 + 60 + 16 + 2 + false + + + + + + + false + + + + x + false + 225 + false + true + false + + + + + x + + + + + + + + + false + 0 + true + 1.5.21.0 + false + false + + + 1 + \ No newline at end of file diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/180504_D00410_0608_BHGYGYBCX2/SampleSheet.csv b/tests/fixtures/apps/demultiplexing/flow_cells/180504_D00410_0608_BHGYGYBCX2/SampleSheet.csv new file mode 100644 index 0000000000..208e693edf --- /dev/null +++ b/tests/fixtures/apps/demultiplexing/flow_cells/180504_D00410_0608_BHGYGYBCX2/SampleSheet.csv @@ -0,0 +1,37 @@ +FCID,Lane,SampleID,SampleRef,index,SampleName,Control,Recipe,Operator,Project +HGYGYBCX2,1,MICJB77A3,hg19,ACGCTCGA,MICJB77A3,N,R1,script,374275 +HGYGYBCX2,1,MICJB77A4,hg19,AGATCGCA,MICJB77A4,N,R1,script,374275 +HGYGYBCX2,1,MICJB77A5,hg19,ATCCTGTA,MICJB77A5,N,R1,script,374275 +HGYGYBCX2,1,MICJB77A9,hg19,GTACGCAA,MICJB77A9,N,R1,script,374275 +HGYGYBCX2,1,MICJB77A10,hg19,TATCAGCA,MICJB77A10,N,R1,script,374275 +HGYGYBCX2,1,MICJB77A1,hg19,GAGTTAGT,MICJB77A1,N,R1,script,374275 +HGYGYBCX2,1,MICJB77A2,hg19,AAGGTACA,MICJB77A2,N,R1,script,374275 +HGYGYBCX2,1,MICJB71A1,hg19,AGGATGGT,MICJB71A1,N,R1,script,886022 +HGYGYBCX2,1,MICJB71A9,hg19,ATAGCGGT,MICJB71A9,N,R1,script,886022 +HGYGYBCX2,1,MICJB71A2,hg19,GCACAACT,MICJB71A2,N,R1,script,886022 +HGYGYBCX2,1,MICJB71A10,hg19,CGCATGAT,MICJB71A10,N,R1,script,886022 +HGYGYBCX2,1,MICJB71A3,hg19,CTGTTAGG,MICJB71A3,N,R1,script,886022 +HGYGYBCX2,1,MICJB75A15,hg19,GGTATAGG,MICJB75A15,N,R1,script,microbial +HGYGYBCX2,1,MICJB71A4,hg19,CGATAGAG,MICJB71A4,N,R1,script,886022 +HGYGYBCX2,1,MICJB71A5,hg19,ATGGTTGC,MICJB71A5,N,R1,script,886022 +HGYGYBCX2,1,MICJB71A6,hg19,AGGTGTAC,MICJB71A6,N,R1,script,886022 +HGYGYBCX2,1,MICJB71A7,hg19,CGGATTGA,MICJB71A7,N,R1,script,886022 +HGYGYBCX2,1,MICJB71A8,hg19,AGGAGGAA,MICJB71A8,N,R1,script,886022 +HGYGYBCX2,2,MICJB77A3,hg19,ACGCTCGA,MICJB77A3,N,R1,script,374275 +HGYGYBCX2,2,MICJB77A4,hg19,AGATCGCA,MICJB77A4,N,R1,script,374275 +HGYGYBCX2,2,MICJB77A5,hg19,ATCCTGTA,MICJB77A5,N,R1,script,374275 +HGYGYBCX2,2,MICJB77A9,hg19,GTACGCAA,MICJB77A9,N,R1,script,374275 +HGYGYBCX2,2,MICJB77A10,hg19,TATCAGCA,MICJB77A10,N,R1,script,374275 +HGYGYBCX2,2,MICJB77A1,hg19,GAGTTAGT,MICJB77A1,N,R1,script,374275 +HGYGYBCX2,2,MICJB77A2,hg19,AAGGTACA,MICJB77A2,N,R1,script,374275 +HGYGYBCX2,2,MICJB71A1,hg19,AGGATGGT,MICJB71A1,N,R1,script,886022 +HGYGYBCX2,2,MICJB71A9,hg19,ATAGCGGT,MICJB71A9,N,R1,script,886022 +HGYGYBCX2,2,MICJB71A2,hg19,GCACAACT,MICJB71A2,N,R1,script,886022 +HGYGYBCX2,2,MICJB71A10,hg19,CGCATGAT,MICJB71A10,N,R1,script,886022 +HGYGYBCX2,2,MICJB71A3,hg19,CTGTTAGG,MICJB71A3,N,R1,script,886022 +HGYGYBCX2,2,MICJB75A15,hg19,GGTATAGG,MICJB75A15,N,R1,script,microbial +HGYGYBCX2,2,MICJB71A4,hg19,CGATAGAG,MICJB71A4,N,R1,script,886022 +HGYGYBCX2,2,MICJB71A5,hg19,ATGGTTGC,MICJB71A5,N,R1,script,886022 +HGYGYBCX2,2,MICJB71A6,hg19,AGGTGTAC,MICJB71A6,N,R1,script,886022 +HGYGYBCX2,2,MICJB71A7,hg19,CGGATTGA,MICJB71A7,N,R1,script,886022 +HGYGYBCX2,2,MICJB71A8,hg19,AGGAGGAA,MICJB71A8,N,R1,script,886022 diff --git a/tests/fixtures/apps/demultiplexing/flow_cells/SampleSheet.csv b/tests/fixtures/apps/demultiplexing/flow_cells/SampleSheet.csv deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_hiseq_2500_different_index_cycles.xml b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_hiseq_2500_different_index_cycles.xml new file mode 100644 index 0000000000..ef8b2fd4ad --- /dev/null +++ b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_hiseq_2500_different_index_cycles.xml @@ -0,0 +1,137 @@ + + + + HGYGYBCX2_ubiella_metvirus_RNA_9pM + -999 + B + DUALINDEX + true + 101 + 8 + 10 + 101 + X:\Runs + true + v8RRv2bins + Save Tile Thumbnails + HiSeq Rapid Flow Cell v2 + false + false + HiSeq Rapid SBS Kit v2 + HiSeq Rapid PE Cluster Kit v2 + HiSeq Rapid PE Cluster Kit v2 + + 1 + 2 + + OnBoardClustering + RapidRunV2 + RapidRun + None + false + false + HiSeq Control Software + 2.2.58 + 180504_D00410_0608_BHGYGYBCX2 + 180504 + BaseSpace + + 84966884 + true + true + false + + D00410 + 608 + HWI-D00410 + 7.9.7 + 3.0.0 + 1.18.64 + Illumina,Bruno Fluidics Controller,0,v2.0340 + 2.01-F20-R02 + 6.45.20.3690 + + HGYGYBCX2 + sbsuser + +
+
+ + DynamicITF + BothLaneSurfaces + DualSwathFC + true + true + true + true + true + false + HiSeq Adapter Plate + HiSeq Flow Cell Holder + 5 + 8 + + 3 + 7 + 0 + 500 + 500 + 250 + 350 + 10 + 200 + 100 + 50 + 0 + 65535 + 100 + 4 + + 2048 + 10000 + 2048 + 160000 + 1.40625E-07 + 60 + 16 + 2 + false + + + + + + + false + + + + x + false + 225 + false + true + false + + + + + x + + + + + + + + + false + 0 + true + 1.5.21.0 + false + false + + + 1 + \ No newline at end of file diff --git a/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_6000_different_index_cycles.xml b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_6000_different_index_cycles.xml index acfe13eff1..91dfe305a7 100644 --- a/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_6000_different_index_cycles.xml +++ b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_6000_different_index_cycles.xml @@ -62,16 +62,8 @@ 02/06/2021 12:40:00 3 - C:\Program Files\Illumina\NovaSeq Control Software\Recipe false - /ilmn/outputfolder/210206_A00689_0226_AHMYJ7DSXY/ - /ilmn/outputfolder - Z:\outputfolder\210206_A00689_0226_AHMYJ7DSXY\ - \\172.16.1.6\clinicaldata\Runs\210206_A00689_0226_AHMYJ7DSXY\ - \\172.16.1.6\clinicaldata\Runs - C:\ProgramData\Illumina\NovaSeq\NovaSeqTemp\210206_A00689_0226_AHMYJ7DSXY\ - C:\ProgramData\Illumina\NovaSeq\NovaSeqTemp\RunSetupLogs\A00689_2021-02-06__11_58_48_SideA 210206 210206_A00689_0226_AHMYJ7DSXY true diff --git a/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_X_different_index_cycles.xml b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_X_different_index_cycles.xml index 84721114cc..a2b70c9b06 100755 --- a/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_X_different_index_cycles.xml +++ b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_X_different_index_cycles.xml @@ -3,7 +3,6 @@ A control-software 1.0.1.7385 - //ngi-nas.scilifelab.se/ngi_data/Runs/20230508_LH00188_0003_A22522YLT3 InstrumentPerformance NovaSeqXPlus LH00188 diff --git a/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_X_wrong_instrument.xml b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_X_wrong_instrument.xml new file mode 100755 index 0000000000..5dcf32ecb4 --- /dev/null +++ b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_X_wrong_instrument.xml @@ -0,0 +1,65 @@ + + + A + control-software + 1.0.1.7385 + InstrumentPerformance + Other + LH00188 + 20230508_LH00188_0003_A22522YLT3 + 3 + 10B Sequencing + 10B-01.00.00 + 22522YLT3 + NovaSeqXSeries B3 + NovaSeqXSeriesB3 + 1.2345678 + + + 22522YLT3 + 20737128 + 20080370 + 2024-02-28T00:00:00+01:00 + FlowCell + 10B + + + LC4001354-LC3 + 20707596 + 20066614 + 2023-12-14T00:00:00+01:00 + Reagent + 10B + + + LC2212060272-1 + 22120701 + 20089853 + 2023-06-02T00:00:00+02:00 + Buffer + 10B + + + LC1006247-LC1 + 1000015508 + 20072271 + 2024-03-25T00:00:00+01:00 + SampleTube + 10B + + + LC2000247-LC1 + 17714646 + 20081650 + 2023-11-08T00:00:00+01:00 + Lyo + 10B + + + + + + + + + \ No newline at end of file diff --git a/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_no_software_nor_reagent_version.xml b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_no_software_nor_reagent_version.xml index 668363d89b..67ce1720d2 100644 --- a/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_no_software_nor_reagent_version.xml +++ b/tests/fixtures/apps/demultiplexing/run_parameters/RunParameters_novaseq_no_software_nor_reagent_version.xml @@ -61,16 +61,7 @@ 09/06/2022 10:10:00 4 - C:\Program Files\Illumina\NovaSeq Control Software\Recipe - false - /ilmn/outputfolder/220906_A00689_0636_AH7CJFDRX2/ - /ilmn/outputfolder - \\fddc-65e5-66fa--1.ipv6-literal.net\ilmn\outputfolder\220906_A00689_0636_AH7CJFDRX2\ - \\172.16.1.6\clinicaldata\Runs\220906_A00689_0636_AH7CJFDRX2\ - \\172.16.1.6\clinicaldata\Runs - C:\ProgramData\Illumina\NovaSeq\NovaSeqTemp\220906_A00689_0636_AH7CJFDRX2\ - C:\ProgramData\Illumina\NovaSeq\NovaSeqTemp\RunSetupLogs\A00689_2022-09-06__09_36_50_SideA 220906 220906_A00689_0636_AH7CJFDRX2 true @@ -132,7 +123,6 @@ true - 196DDD1C160F20EA 1.6.3.1575 Manual NovaSeqStandard diff --git a/tests/meta/demultiplex/conftest.py b/tests/meta/demultiplex/conftest.py index 6c13aad965..83187c1fa6 100644 --- a/tests/meta/demultiplex/conftest.py +++ b/tests/meta/demultiplex/conftest.py @@ -86,8 +86,8 @@ def flow_cell_project_id() -> int: @pytest.fixture(name="hiseq_x_copy_complete_file") def hiseq_x_copy_complete_file(bcl2fastq_flow_cell: FlowCellDirectoryData) -> Path: - """Return Hiseq X flow cell copy complete file.""" - return Path(bcl2fastq_flow_cell.path, DemultiplexingDirsAndFiles.Hiseq_X_COPY_COMPLETE) + """Return HiSeqX flow cell copy complete file.""" + return Path(bcl2fastq_flow_cell.path, DemultiplexingDirsAndFiles.HISEQ_X_COPY_COMPLETE) @pytest.fixture(name="populated_flow_cell_store") diff --git a/tests/models/demultiplexing/conftest.py b/tests/models/demultiplexing/conftest.py index 16066de55a..b1f6e09e40 100644 --- a/tests/models/demultiplexing/conftest.py +++ b/tests/models/demultiplexing/conftest.py @@ -3,7 +3,7 @@ import pytest -@pytest.fixture(name="demultiplexed_bcl_convert_flow_cell") +@pytest.fixture def demultiplexed_bcl_convert_flow_cell( demultiplexed_runs: Path, bcl_convert_flow_cell_full_name: str ) -> Path: diff --git a/tests/models/demultiplexing/test_run_parameters.py b/tests/models/demultiplexing/test_run_parameters.py index b7754b74cf..e9d1f05f22 100644 --- a/tests/models/demultiplexing/test_run_parameters.py +++ b/tests/models/demultiplexing/test_run_parameters.py @@ -2,46 +2,84 @@ from pathlib import Path import pytest +from _pytest.fixtures import FixtureRequest from cg.constants.demultiplexing import RunParametersXMLNodes from cg.constants.sequencing import Sequencers -from cg.exc import RunParametersError +from cg.exc import RunParametersError, XMLError from cg.models.demultiplex.run_parameters import ( RunParameters, + RunParametersHiSeq, RunParametersNovaSeq6000, RunParametersNovaSeqX, ) +@pytest.mark.parametrize( + "run_parameters_path_fixture", + [ + "hiseq_x_single_index_run_parameters_path", + "hiseq_2500_double_index_run_parameters_path", + "novaseq_6000_run_parameters_path", + "novaseq_x_run_parameters_path", + ], +) def test_run_parameters_parent_class_fails( - novaseq_6000_run_parameters_path: Path, - novaseq_x_run_parameters_path: Path, + run_parameters_path_fixture: str, request: FixtureRequest ): """Test that trying to instantiate the parent RunParameters class raises an error.""" - # GIVEN valid paths for run parameters files + # GIVEN a valid path for a run parameters file + run_parameters_path: Path = request.getfixturevalue(run_parameters_path_fixture) - # WHEN trying to instantiate the parent class with a NovaSeq6000 file + # WHEN trying to instantiate the parent class with a RunParameters path with pytest.raises(NotImplementedError) as exc_info: # THEN an NotImplementedError is raised - RunParameters(run_parameters_path=novaseq_6000_run_parameters_path) - assert ( - str(exc_info.value) - == "Parent class instantiated. Please instantiate a child class instead" - ) + RunParameters(run_parameters_path=run_parameters_path) + assert "Parent class instantiated" in str(exc_info) + + +def test_run_parameters_hiseq_x(hiseq_x_single_index_run_parameters_path: Path): + """Tests that creating a HiSeq RunParameters with a HiSeqX path file works.""" + # GIVEN a valid HiSeqX run parameters file path + + # WHEN creating a HiSeqX RunParameters object + run_parameters = RunParametersHiSeq(hiseq_x_single_index_run_parameters_path) + + # THEN the created object is of the correct type + assert isinstance(run_parameters, RunParametersHiSeq) + # THEN the sequencer is HiSeqX + assert run_parameters.sequencer == Sequencers.HISEQX + + +def test_run_parameters_hiseq_2500(hiseq_2500_double_index_run_parameters_path: Path): + """Tests that creating a HiSeq RunParameters with a HiSeq2500 path file works.""" + # GIVEN a valid HiSeq2500 run parameters file path + + # WHEN creating a HiSeq2500 RunParameters object + run_parameters = RunParametersHiSeq(hiseq_2500_double_index_run_parameters_path) + + # THEN the created object is of the correct type + assert isinstance(run_parameters, RunParametersHiSeq) + # THEN the sequencer is HiSeq2500 + assert run_parameters.sequencer == Sequencers.HISEQGA - # WHEN trying to instantiate the parent class with a NovaSeqX file - with pytest.raises(NotImplementedError) as exc_info: - # THEN an NotImplementedError is raised - RunParameters(run_parameters_path=novaseq_x_run_parameters_path) - assert ( - str(exc_info.value) - == "Parent class instantiated. Please instantiate a child class instead" - ) + +def test_run_parameters_hiseq_wrong_file(novaseq_6000_run_parameters_path: Path): + """Tests that creating a RunParameters HiSeq object with the wrong file fails.""" + # GIVEN a file path with a run parameters file from an instrument different from HiSeq + + # WHEN trying to create a HiSeq RunParameters object with the file + with pytest.raises(RunParametersError) as exc_info: + # THEN a RunParametersError is raised + RunParametersHiSeq(run_parameters_path=novaseq_6000_run_parameters_path) + assert ( + f"Could not find node {RunParametersXMLNodes.APPLICATION_NAME} in the run parameters file" + in str(exc_info.value) + ) def test_run_parameters_novaseq_6000(novaseq_6000_run_parameters_path: Path): """Tests that creating a RunParameters NovaSeq6000 object with the correct file works.""" - # GIVEN a valid NovaSeq6000 run parameters file path # WHEN creating a NovaSeq6000 RunParameters object @@ -49,7 +87,7 @@ def test_run_parameters_novaseq_6000(novaseq_6000_run_parameters_path: Path): # THEN the created object is of the correct type and has the correct attributes assert isinstance(run_parameters, RunParametersNovaSeq6000) - assert run_parameters.sequencer == Sequencers.NOVASEQ.value + assert run_parameters.sequencer == Sequencers.NOVASEQ def test_run_parameters_novaseq_6000_wrong_file(novaseq_x_run_parameters_path: Path): @@ -58,16 +96,16 @@ def test_run_parameters_novaseq_6000_wrong_file(novaseq_x_run_parameters_path: P # WHEN trying to create a NovaSeq6000 RunParameters object with the file with pytest.raises(RunParametersError) as exc_info: - # THEN an error is raised + # THEN a RunParametersError is raised RunParametersNovaSeq6000(run_parameters_path=novaseq_x_run_parameters_path) - assert ( - str(exc_info.value) == "The file parsed does not correspond to a NovaSeq6000 instrument" - ) + assert ( + str(exc_info.value) + == f"The file parsed does not correspond to {RunParametersXMLNodes.NOVASEQ_6000_APPLICATION}" + ) def test_run_parameters_novaseq_x(novaseq_x_run_parameters_path: Path): """Tests that creating a RunParameters NovaSeqX object with the correct file works.""" - # GIVEN a valid NovaSeqX run parameters file path # WHEN creating a NovaSeqX RunParameters object @@ -75,7 +113,7 @@ def test_run_parameters_novaseq_x(novaseq_x_run_parameters_path: Path): # THEN the created object is of the correct type and has the correct attributes assert isinstance(run_parameters, RunParametersNovaSeqX) - assert run_parameters.sequencer == Sequencers.NOVASEQX.value + assert run_parameters.sequencer == Sequencers.NOVASEQX def test_run_parameters_novaseq_x_wrong_file(novaseq_6000_run_parameters_path: Path): @@ -84,12 +122,51 @@ def test_run_parameters_novaseq_x_wrong_file(novaseq_6000_run_parameters_path: P # WHEN trying to create a NovaSeqX RunParameters object with the file with pytest.raises(RunParametersError) as exc_info: - # THEN an error is raised + # THEN a RunParametersError is raised RunParametersNovaSeqX(run_parameters_path=novaseq_6000_run_parameters_path) - assert str(exc_info.value) == "The file parsed does not correspond to a NovaSeqX instrument" + assert ( + f"Could not find node {RunParametersXMLNodes.INSTRUMENT_TYPE} in the run parameters" + in str(exc_info.value) + ) + + +def test_run_parameters_novaseq_x_file_wrong_instrument(run_parameters_wrong_instrument: Path): + """Test that initialising a NovaSeqX with a RunParameters file with a wrong instrument fails.""" + # GIVEN a RunParameters file with a wrong instrument + + # WHEN initialising the NovaSeqX + with pytest.raises(RunParametersError) as exc_info: + # THEN assert that an exception was raised since the control software version was not found + RunParametersNovaSeqX(run_parameters_path=run_parameters_wrong_instrument) + + assert ( + str(exc_info.value) + == f"The file parsed does not correspond to {RunParametersXMLNodes.NOVASEQ_X_INSTRUMENT}" + ) -def test_reagent_kit_version(novaseq_6000_run_parameters: RunParametersNovaSeq6000): +@pytest.mark.parametrize( + "run_parameters_fixture", + [ + "hiseq_2500_run_parameters_double_index", + "hiseq_x_run_parameters_single_index", + "novaseq_x_run_parameters", + ], +) +def test_reagent_kit_version_hiseq_and_novaseq_x( + run_parameters_fixture: str, request: FixtureRequest +): + """Test that getting reagent kit version from a HiSeq or NovaSeqX RunParameters returns None.""" + # GIVEN a valid RunParameters object + run_parameters: RunParameters = request.getfixturevalue(run_parameters_fixture) + + # WHEN fetching the reagent kit version + + # THEN the reagent kit version is None + assert not run_parameters.reagent_kit_version + + +def test_reagent_kit_version_novaseq_6000(novaseq_6000_run_parameters: RunParametersNovaSeq6000): """Test that getting reagent kit version from a correct file returns an expected value.""" # GIVEN a valid RunParameters object for NovaSeq6000 @@ -101,7 +178,7 @@ def test_reagent_kit_version(novaseq_6000_run_parameters: RunParametersNovaSeq60 assert reagent_kit_version != RunParametersXMLNodes.UNKNOWN_REAGENT_KIT_VERSION -def test_reagent_kit_version_missing_version( +def test_reagent_kit_version_novaseq_6000_missing_version( run_parameters_missing_versions: RunParametersNovaSeq6000, caplog ): """Test that 'unknown' will be returned if the run parameters file has no reagent kit method.""" @@ -116,17 +193,30 @@ def test_reagent_kit_version_missing_version( assert "Could not determine reagent kit version" in caplog.text -def test_reagent_kit_version_novaseq_x(novaseq_x_run_parameters: RunParametersNovaSeqX): - """Test that getting reagent kit version from a NovaSeqX run parameters returns None.""" - # GIVEN a valid RunParameters object for NovaSeqX +@pytest.mark.parametrize( + "run_parameters_fixture", + [ + "hiseq_2500_run_parameters_double_index", + "hiseq_x_run_parameters_single_index", + "novaseq_x_run_parameters", + ], +) +def test_control_software_version_hiseq_and_novaseq_x( + run_parameters_fixture: str, request: FixtureRequest +): + """Test that getting control software version from HiSeq/NovaSeqX RunParameters returns None.""" + # GIVEN a valid RunParameters object + run_parameters: RunParameters = request.getfixturevalue(run_parameters_fixture) - # WHEN fetching the reagent kit version + # WHEN fetching the control software version - # THEN the reagent kit version is None - assert not novaseq_x_run_parameters.reagent_kit_version + # THEN the control software version is None + assert not run_parameters.control_software_version -def test_control_software_version(novaseq_6000_run_parameters: RunParametersNovaSeq6000): +def test_control_software_version_novaseq_6000( + novaseq_6000_run_parameters: RunParametersNovaSeq6000, +): """Test that getting control software version from a correct file returns an expected value.""" # GIVEN a valid RunParameters object for NovaSeq6000 @@ -138,113 +228,91 @@ def test_control_software_version(novaseq_6000_run_parameters: RunParametersNova assert control_software_version != "" -def test_control_software_version_no_version(run_parameters_missing_versions: Path, caplog): +def test_control_software_version_novaseq_6000_no_version( + run_parameters_missing_versions: Path, caplog +): """Test that fetching the control software version from a file without that field fails.""" caplog.set_level(logging.INFO) # GIVEN a RunParameters object created from a file without control software version # WHEN fetching the control software version - with pytest.raises(RunParametersError): + with pytest.raises(XMLError): # THEN assert that an exception was raised since the control software version was not found run_parameters_missing_versions.control_software_version - assert "Could not determine control software version" in caplog.text - - -def test_control_software_version_novaseq_x(novaseq_x_run_parameters: RunParametersNovaSeqX): - """Test that getting control software version from a NovaSeqX run parameters returns None.""" - # GIVEN a valid RunParameters object for NovaSeqX - - # WHEN fetching the control software version - - # THEN the control software version is None - assert not novaseq_x_run_parameters.control_software_version - - -def test_index_length_novaseq_6000( - novaseq_6000_run_parameters: RunParametersNovaSeq6000, -): - """Test that getting the index length from a NovaSeq6000 run parameters file returns an int.""" - # GIVEN a valid RunParametersNovaSeq6000 object - - # WHEN getting the index length - - # THEN the index length is an int - assert isinstance(novaseq_6000_run_parameters.index_length, int) + assert ( + f"Could not find node with name {RunParametersXMLNodes.APPLICATION_VERSION} in XML tree" + in caplog.text + ) -def test_index_length_novaseq_x( - novaseq_x_run_parameters: RunParametersNovaSeqX, +@pytest.mark.parametrize( + "run_parameters_fixture, expected_index_length", + [ + ("hiseq_2500_run_parameters_double_index", 8), + ("hiseq_x_run_parameters_single_index", 8), + ("novaseq_6000_run_parameters", 10), + ("novaseq_x_run_parameters", 10), + ], +) +def test_index_length( + run_parameters_fixture: str, expected_index_length: int, request: FixtureRequest ): - """Test that getting the index length from a NovaSeqX run parameters file returns an int.""" - # GIVEN a valid RunParametersNovaSeqX object + """Test getting the index length from RunParameters objects return expected values.""" + # GIVEN a valid RunParameters object + run_parameters: RunParameters = request.getfixturevalue(run_parameters_fixture) # WHEN getting the index length + real_index_length: int = run_parameters.index_length # THEN the index length is an int - assert isinstance(novaseq_x_run_parameters.index_length, int) + assert isinstance(real_index_length, int) + # THEN the index length is the expected value + assert real_index_length == expected_index_length -def test_index_length_novaseq_6000_different_length( - run_parameters_novaseq_6000_different_index_path: Path, - run_parameters_novaseq_x_different_index_path: Path, +@pytest.mark.parametrize( + "run_parameters_fixture", + [ + "run_parameters_novaseq_6000_different_index", + "run_parameters_novaseq_x_different_index", + ], +) +def test_index_length_different_length( + run_parameters_fixture: str, + request: FixtureRequest, ): """Test that getting the index length from a file with different index cycles fails.""" - # GIVEN a RunParameters object created from a file with different index cycles - run_parameters_novaseq_6000 = RunParametersNovaSeq6000( - run_parameters_path=run_parameters_novaseq_6000_different_index_path - ) + # GIVEN a NovaSeq6000 RunParameters object created from a file with different index cycles + run_parameters: RunParameters = request.getfixturevalue(run_parameters_fixture) # WHEN fetching index length with pytest.raises(RunParametersError) as exc_info: # THEN assert that an exception was raised since the index cycles are different - run_parameters_novaseq_6000.index_length - assert str(exc_info.value) == "Index lengths are not the same!" - - -def test_index_length_novaseq_x_different_length( - run_parameters_novaseq_x_different_index_path: Path, -): - """Test that getting the index length from a file with different index cycles fails.""" - # GIVEN a RunParameters object created from a file with different index cycles - run_parameters_novaseq_x = RunParametersNovaSeqX( - run_parameters_path=run_parameters_novaseq_x_different_index_path - ) - # WHEN fetching index length - with pytest.raises(RunParametersError) as exc_info: - # THEN assert that an exception was raised since the index cycles are different - run_parameters_novaseq_x.index_length - assert str(exc_info.value) == "Index lengths are not the same!" - - -def test_get_cycles_novaseq_6000(novaseq_6000_run_parameters: RunParametersNovaSeq6000): - """Test that the read and index cycles are read correctly for NovaSeqX run parameters.""" - # GIVEN a NovaSeq6000 run parameters object - - # WHEN getting any read cycle - read_cycles: list[int] = [ - novaseq_6000_run_parameters.get_read_1_cycles(), - novaseq_6000_run_parameters.get_read_2_cycles(), - novaseq_6000_run_parameters.get_index_1_cycles(), - novaseq_6000_run_parameters.get_index_2_cycles(), - ] - - # THEN all read cycles are non-negative integers - for cycles in read_cycles: - assert isinstance(cycles, int) - assert cycles >= 0 - - -def test_get_cycles_novaseq_x(novaseq_x_run_parameters: RunParametersNovaSeqX): - """Test that the read and index cycles are read correctly for NovaSeqX run parameters.""" - # GIVEN a NovaSeqX run parameters object + run_parameters.index_length + assert str(exc_info.value) == "Index lengths are not the same!" + + +@pytest.mark.parametrize( + "run_parameters_fixture", + [ + "hiseq_2500_run_parameters_double_index", + "hiseq_x_run_parameters_single_index", + "novaseq_6000_run_parameters", + "novaseq_x_run_parameters", + ], +) +def test_get_cycles(run_parameters_fixture: str, request: FixtureRequest): + """Test that the read and index cycles are read correctly for any RunParameters object.""" + # GIVEN a RunParameters object + run_parameters: RunParameters = request.getfixturevalue(run_parameters_fixture) # WHEN getting any read cycle read_cycles: list[int] = [ - novaseq_x_run_parameters.get_read_1_cycles(), - novaseq_x_run_parameters.get_read_2_cycles(), - novaseq_x_run_parameters.get_index_1_cycles(), - novaseq_x_run_parameters.get_index_2_cycles(), + run_parameters.get_read_1_cycles(), + run_parameters.get_read_2_cycles(), + run_parameters.get_index_1_cycles(), + run_parameters.get_index_2_cycles(), ] # THEN all read cycles are non-negative integers diff --git a/tests/models/flow_cell/test_flowcell_model.py b/tests/models/flow_cell/test_flowcell_model.py index 6fcf7a988e..216c9f993c 100644 --- a/tests/models/flow_cell/test_flowcell_model.py +++ b/tests/models/flow_cell/test_flowcell_model.py @@ -1,7 +1,10 @@ +import os +import sys from pathlib import Path from typing import Type import pytest +from _pytest.fixtures import FixtureRequest from cg.apps.demultiplex.sample_sheet.models import ( FlowCellSampleBcl2Fastq, @@ -9,10 +12,13 @@ ) from cg.cli.demultiplex.copy_novaseqx_demultiplex_data import get_latest_analysis_path from cg.constants.demultiplexing import BclConverter, DemultiplexingDirsAndFiles +from cg.constants.sequencing import Sequencers +from cg.exc import FlowCellError +from cg.models.demultiplex.run_parameters import RunParameters from cg.models.flow_cell.flow_cell import FlowCellDirectoryData -def test_flowcell_id(bcl2fastq_flow_cell_dir: Path): +def test_flow_cell_id(bcl2fastq_flow_cell_dir: Path): """Test parsing of flow cell id.""" # GIVEN the path to a finished flow cell run # GIVEN the flow cell id @@ -25,7 +31,7 @@ def test_flowcell_id(bcl2fastq_flow_cell_dir: Path): assert flowcell_obj.id == flowcell_id -def test_flowcell_position(bcl2fastq_flow_cell_dir: Path): +def test_flow_cell_position(bcl2fastq_flow_cell_dir: Path): """Test getting flow cell position.""" # GIVEN the path to a finished flow cell # GIVEN a flow cell object @@ -131,19 +137,62 @@ def test_flow_cell_directory_data_with_novaseq_flow_cell_directory( assert flow_cell.bcl_converter == BclConverter.DRAGEN -def test_get_run_parameters_when_non_existing(demultiplexed_runs: Path): - # GIVEN a flowcell object with a directory without run parameters - flowcell_path: Path = Path( - demultiplexed_runs, - "201203_D00483_0200_AHVKJCDRXX", - ) - flow_cell = FlowCellDirectoryData(flow_cell_path=flowcell_path) - assert flow_cell.run_parameters_path.exists() is False +@pytest.mark.parametrize( + "flow_cell_fixture, expected_run_parameters_file_name", + [ + ("hiseq_x_flow_cell", DemultiplexingDirsAndFiles.RUN_PARAMETERS_CAMEL_CASE), + ("hiseq_2500_flow_cell", DemultiplexingDirsAndFiles.RUN_PARAMETERS_PASCAL_CASE), + ], +) +def test_run_parameters_path( + flow_cell_fixture: str, expected_run_parameters_file_name: str, request: FixtureRequest +): + """.""" + # GIVEN a flow cell with a run parameters + flow_cell: FlowCellDirectoryData = request.getfixturevalue(flow_cell_fixture) + + # WHEN getting the run parameters file name + run_parameters_path: Path = flow_cell.run_parameters_path + + # THEN it should exist and be the expected one + assert run_parameters_path.exists() + assert run_parameters_path.name == expected_run_parameters_file_name + + +def test_run_parameters_path_when_non_existing(tmp_flow_cells_directory_no_run_parameters: Path): + """Test that getting the path of the run parameters path fails if the file does not exist.""" + # GIVEN a flowcell object with a directory without a run parameters file + flow_cell = FlowCellDirectoryData(flow_cell_path=tmp_flow_cells_directory_no_run_parameters) + + # WHEN fetching the run parameters path + with pytest.raises(FlowCellError) as exc: + # THEN a FlowCellError is raised + flow_cell.run_parameters_path + assert "No run parameters file found in flow cell" in str(exc.value) + + +@pytest.mark.parametrize( + "flow_cell_fixture, expected_sequencer", + [ + ("hiseq_2500_flow_cell", Sequencers.HISEQGA), + ("hiseq_x_flow_cell", Sequencers.HISEQX), + ("novaseq_6000_flow_cell", Sequencers.NOVASEQ), + ("novaseq_x_flow_cell", Sequencers.NOVASEQX), + ], +) +def test_flow_cell_run_parameters_type( + flow_cell_fixture: str, expected_sequencer: str, request: FixtureRequest +): + """Test that the run parameters of the flow cell is of the expected type.""" + # GIVEN a flow cell without _run_parameters + flow_cell: FlowCellDirectoryData = request.getfixturevalue(flow_cell_fixture) + assert not flow_cell._run_parameters + + # WHEN creating the run parameters of the flow cell + run_parameters: RunParameters = flow_cell.run_parameters - # WHEN fetching the run parameters object - with pytest.raises(FileNotFoundError): - # THEN assert that a FileNotFound error is raised - flow_cell.run_parameters + # THEN the run parameters sequencer is the same as of the flow cell + assert run_parameters.sequencer == expected_sequencer def test_has_demultiplexing_started_locally_false(tmp_flow_cell_directory_bclconvert: Path):