From f8a6ac8cbd086e06aa12cfb8e979f92532c1dcfc Mon Sep 17 00:00:00 2001
From: Sebastian Diaz <juan.sebastian.diaz.boada@scilifelab.se>
Date: Thu, 17 Oct 2024 09:58:08 +0200
Subject: [PATCH] new post process all (#3845)(minor)

## Description
Closes https://github.com/Clinical-Genomics/add-new-tech/issues/118
Create a command `post-process all` that will post-process all instrument runs regardless of the instrument. Currently only works for Pacbio. The instrument can be added as a flag so that only runs of a given instrument are post-processed.

### Added

- Cli command function `post_process_all_runs` in `cg/cli/post_process/post_process.py`.
- Utils functions and a model to get and filter the runs to post-process in `cg/cli/post_process/utils.py`
- Tests and fixtures for these utils functions
- Abstract service family ` RunNamesService` with the child `PacbioRunNamesService` in charge of generating sequencing run names
- Tests and fixtures for this new service
- New property `run_names_services` in the `CGConfig` class to access the run names service from the config
- New check function `is_run_processed` defined abstractly in the post-processing class and in implemented in the Pacbio child class that checks if a run has been post-processed
---
 cg/cli/post_process/post_process.py           | 38 ++++++++++-
 cg/cli/post_process/utils.py                  | 58 +++++++++++++++++
 cg/models/cg_config.py                        | 40 +++++++-----
 cg/services/run_devices/abstract_classes.py   | 14 ++++-
 .../pacbio_store_service.py                   |  3 +
 .../data_transfer_service.py                  | 11 +++-
 .../pacbio_houskeeper_service.py              |  9 +--
 .../pacbio/metrics_parser/metrics_parser.py   |  7 ++-
 .../pacbio/post_processing_service.py         | 11 +++-
 .../run_validator/pacbio_run_validator.py     | 12 ++--
 cg/services/run_devices/run_names/__init__.py |  0
 cg/services/run_devices/run_names/pacbio.py   | 20 ++++++
 cg/services/run_devices/run_names/service.py  | 12 ++++
 .../validate_file_transfer_service.py         |  4 ++
 cg/store/crud/create.py                       |  2 +
 tests/cli/post_process/test_utils.py          | 35 ++++++++++-
 tests/conftest.py                             |  1 +
 tests/fixture_plugins/device_fixtures.py      | 13 ++--
 .../pacbio_fixtures/context_fixtures.py       |  7 ++-
 .../pacbio_fixtures/name_fixtures.py          | 27 ++++++++
 .../pacbio_fixtures/service_fixtures.py       | 26 +++++++-
 .../unprocessed_runs_fixtures.py              | 35 +++++++++++
 .../1_B01/post_processing_completed           |  0
 .../test_pac_bio_run_validator.py             | 49 +++++++--------
 .../test_pacbio_run_names_service.py          | 63 +++++++++++++++++++
 25 files changed, 422 insertions(+), 75 deletions(-)
 create mode 100644 cg/services/run_devices/run_names/__init__.py
 create mode 100644 cg/services/run_devices/run_names/pacbio.py
 create mode 100644 cg/services/run_devices/run_names/service.py
 create mode 100644 tests/fixture_plugins/pacbio_fixtures/unprocessed_runs_fixtures.py
 create mode 100644 tests/fixtures/devices/pacbio/SMRTcells/r84202_20240913_121403/1_B01/post_processing_completed
 create mode 100644 tests/services/run_names/test_pacbio_run_names_service.py

diff --git a/cg/cli/post_process/post_process.py b/cg/cli/post_process/post_process.py
index 93d43b540a..f0fc1ccd8f 100644
--- a/cg/cli/post_process/post_process.py
+++ b/cg/cli/post_process/post_process.py
@@ -4,7 +4,11 @@
 
 import click
 
-from cg.cli.post_process.utils import get_post_processing_service_from_run_name
+from cg.cli.post_process.utils import (
+    UnprocessedRunInfo,
+    get_post_processing_service_from_run_name,
+    get_unprocessed_runs_info,
+)
 from cg.cli.utils import CLICK_CONTEXT_SETTINGS
 from cg.constants.cli_options import DRY_RUN
 from cg.models.cg_config import CGConfig
@@ -23,7 +27,7 @@ def post_process_group():
 @DRY_RUN
 @click.argument("run-name")
 @click.pass_obj
-def post_process_sequencing_run(context: CGConfig, run_name: str, dry_run: bool):
+def post_process_run(context: CGConfig, run_name: str, dry_run: bool) -> None:
     """Post-process a sequencing run from the PacBio instrument.
 
     run-name is the full name of the sequencing unit of run. For example:
@@ -35,4 +39,32 @@ def post_process_sequencing_run(context: CGConfig, run_name: str, dry_run: bool)
     post_processing_service.post_process(run_name=run_name, dry_run=dry_run)
 
 
-post_process_group.add_command(post_process_sequencing_run)
+@post_process_group.command(name="all")
+@DRY_RUN
+@click.option(
+    "--instrument",
+    type=click.Choice(["pacbio", "all"]),
+    default="all",
+    show_default=True,
+    help="The instrument for which the runs will be post-processed. Choose 'all' to post-process runs from all instruments.",
+    required=False,
+)
+@click.pass_obj
+def post_process_all_runs(context: CGConfig, instrument: str, dry_run: bool) -> None:
+    """Post-process all available runs."""
+    exit_success: bool = True
+    unprocessed_runs: list[UnprocessedRunInfo] = get_unprocessed_runs_info(
+        context=context, instrument=instrument
+    )
+    for run in unprocessed_runs:
+        try:
+            run.post_processing_service.post_process(run_name=run.name, dry_run=dry_run)
+        except Exception as error:
+            LOG.error(f"Could not post-process {run.instrument} run {run.name}: {error}")
+            exit_success = False
+    if not exit_success:
+        raise click.Abort
+
+
+post_process_group.add_command(post_process_run)
+post_process_group.add_command(post_process_all_runs)
diff --git a/cg/cli/post_process/utils.py b/cg/cli/post_process/utils.py
index 7ddddbc4e1..2896fb94f9 100644
--- a/cg/cli/post_process/utils.py
+++ b/cg/cli/post_process/utils.py
@@ -1,13 +1,30 @@
+"""Utility functions for the post-process command."""
+
+import logging
+
+from pydantic import BaseModel, ConfigDict
+
 from cg.exc import CgError
 from cg.models.cg_config import CGConfig
+from cg.services.run_devices.abstract_classes import PostProcessingService
 from cg.services.run_devices.pacbio.post_processing_service import PacBioPostProcessingService
+from cg.services.run_devices.run_names.service import RunNamesService
 from cg.utils.mapping import get_item_by_pattern_in_source
 
+LOG = logging.getLogger(__name__)
+
 PATTERN_TO_DEVICE_MAP: dict[str, str] = {
     r"^r\d+_\d+_\d+/(1|2)_[^/]+$": "pacbio",
 }
 
 
+class UnprocessedRunInfo(BaseModel):
+    name: str
+    post_processing_service: PostProcessingService
+    instrument: str
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+
 def get_post_processing_service_from_run_name(
     context: CGConfig, run_name: str
 ) -> PacBioPostProcessingService:
@@ -21,3 +38,44 @@ def get_post_processing_service_from_run_name(
             f"Run name {run_name} does not match with any known sequencing run name pattern"
         ) from error
     return getattr(context.post_processing_services, device)
+
+
+def get_unprocessed_runs_info(context: CGConfig, instrument: str) -> list[UnprocessedRunInfo]:
+    """Return a list of unprocessed runs for a given instrument or for all instruments."""
+    runs: list[UnprocessedRunInfo] = []
+    instruments_to_check: list[str] = _instruments_to_check(instrument)
+    for instrument_name in instruments_to_check:
+        run_names_service: RunNamesService = getattr(context.run_names_services, instrument_name)
+        runs.extend(
+            _get_unprocessed_runs_from_run_names(
+                run_names=run_names_service.get_run_names(),
+                post_processing_service=getattr(context.post_processing_services, instrument_name),
+                instrument_name=instrument_name,
+            )
+        )
+    return runs
+
+
+def _instruments_to_check(instrument: str) -> list[str]:
+    """Return a list of instruments to check for unprocessed runs."""
+    possible_instruments: list[str] = ["pacbio"]  # Add more instruments here
+    return [instrument] if instrument != "all" else possible_instruments
+
+
+def _get_unprocessed_runs_from_run_names(
+    run_names: list[str], post_processing_service: PostProcessingService, instrument_name
+) -> list[UnprocessedRunInfo]:
+    LOG.debug(f"Adding {instrument_name} run names to the post-processing list")
+    runs: list[UnprocessedRunInfo] = []
+    for name in run_names:
+        if post_processing_service.is_run_processed(name):
+            LOG.debug(f"Run {name} has already been post-processed. Skipping")
+            continue
+        runs.append(
+            UnprocessedRunInfo(
+                name=name,
+                post_processing_service=post_processing_service,
+                instrument=instrument_name,
+            )
+        )
+    return runs
diff --git a/cg/models/cg_config.py b/cg/models/cg_config.py
index 6d96aaf27c..911ab9b32c 100644
--- a/cg/models/cg_config.py
+++ b/cg/models/cg_config.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 from typing import Any
 
-from pydantic import BaseModel, EmailStr, Field, ConfigDict
+from pydantic import BaseModel, ConfigDict, EmailStr, Field
 from typing_extensions import Literal
 
 from cg.apps.coverage import ChanjoAPI
@@ -47,21 +47,14 @@
 from cg.services.run_devices.pacbio.housekeeper_service.pacbio_houskeeper_service import (
     PacBioHousekeeperService,
 )
-from cg.services.run_devices.pacbio.metrics_parser.metrics_parser import (
-    PacBioMetricsParser,
-)
-from cg.services.run_devices.pacbio.post_processing_service import (
-    PacBioPostProcessingService,
-)
+from cg.services.run_devices.pacbio.metrics_parser.metrics_parser import PacBioMetricsParser
+from cg.services.run_devices.pacbio.post_processing_service import PacBioPostProcessingService
 from cg.services.run_devices.pacbio.run_data_generator.pacbio_run_data_generator import (
     PacBioRunDataGenerator,
 )
-from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import (
-    PacBioRunFileManager,
-)
-from cg.services.run_devices.pacbio.run_validator.pacbio_run_validator import (
-    PacBioRunValidator,
-)
+from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import PacBioRunFileManager
+from cg.services.run_devices.pacbio.run_validator.pacbio_run_validator import PacBioRunValidator
+from cg.services.run_devices.run_names.pacbio import PacbioRunNamesService
 from cg.services.sequencing_qc_service.sequencing_qc_service import SequencingQCService
 from cg.services.slurm_service.slurm_cli_service import SlurmCLIService
 from cg.services.slurm_service.slurm_service import SlurmService
@@ -360,11 +353,14 @@ class RunInstruments(BaseModel):
     illumina: IlluminaConfig
 
 
+class RunNamesServices(BaseModel):
+    pacbio: PacbioRunNamesService
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+
 class PostProcessingServices(BaseModel):
     pacbio: PacBioPostProcessingService
-
-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)
 
 
 class CGConfig(BaseModel):
@@ -427,6 +423,7 @@ class CGConfig(BaseModel):
     pdc_service_: PdcService | None = None
     post_processing_services_: PostProcessingServices | None = None
     pigz: CommonAppConfig | None = None
+    run_names_services_: RunNamesServices | None = None
     sample_sheet_api_: IlluminaSampleSheetService | None = None
     scout: CommonAppConfig = None
     scout_api_: ScoutAPI = None
@@ -633,6 +630,17 @@ def pdc_service(self) -> PdcService:
             self.pdc_service_ = service
         return service
 
+    @property
+    def run_names_services(self) -> RunNamesServices:
+        services = self.run_names_services_
+        if services is None:
+            LOG.debug("Instantiating run directory names services")
+            services = RunNamesServices(
+                pacbio=PacbioRunNamesService(self.run_instruments.pacbio.data_dir)
+            )
+            self.run_names_services_ = services
+        return services
+
     @property
     def sample_sheet_api(self) -> IlluminaSampleSheetService:
         sample_sheet_api = self.__dict__.get("sample_sheet_api_")
diff --git a/cg/services/run_devices/abstract_classes.py b/cg/services/run_devices/abstract_classes.py
index 07a1f10dc0..086d4b5d44 100644
--- a/cg/services/run_devices/abstract_classes.py
+++ b/cg/services/run_devices/abstract_classes.py
@@ -1,11 +1,14 @@
 """Post-processing service abstract classes."""
 
+import logging
 from abc import ABC, abstractmethod
 from pathlib import Path
 
 from cg.services.run_devices.abstract_models import PostProcessingDTOs, RunData, RunMetrics
 from cg.services.run_devices.constants import POST_PROCESSING_COMPLETED
 
+LOG = logging.getLogger(__name__)
+
 
 class RunDataGenerator(ABC):
     """Abstract class that holds functionality to create a run data object."""
@@ -83,8 +86,15 @@ def post_process(self, run_name: str, dry_run: bool = False):
         """Store sequencing metrics in StatusDB and relevant files in Housekeeper."""
         pass
 
+    @abstractmethod
+    def is_run_processed(self, run_name: str) -> bool:
+        """Check if a run has been post-processed."""
+        pass
+
     @staticmethod
-    def _touch_post_processing_complete(run_data: RunData) -> None:
+    def _touch_post_processing_complete(run_data: RunData, dry_run: bool = False) -> None:
         """Touch the post-processing complete file."""
         processing_complete_file = Path(run_data.full_path, POST_PROCESSING_COMPLETED)
-        processing_complete_file.touch()
+        if not dry_run:
+            processing_complete_file.touch()
+        LOG.debug(f"Post-processing complete for {run_data.full_path}")
diff --git a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py
index 2fe34dcdc5..5efd5fd0a8 100644
--- a/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py
+++ b/cg/services/run_devices/pacbio/data_storage_service/pacbio_store_service.py
@@ -1,3 +1,5 @@
+"""Module for the Pacbio database store service."""
+
 import logging
 from datetime import datetime
 
@@ -85,4 +87,5 @@ def store_post_processing_data(self, run_data: PacBioRunData, dry_run: bool = Fa
                 f"Dry run, no entries will be added to database for SMRT cell {run_data.full_path}."
             )
             return
+        LOG.debug(f"Data stored in statusDB for run {run_data.sequencing_run_name}")
         self.store.commit_to_store()
diff --git a/cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py b/cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py
index 730db2fbf8..6d953df06a 100644
--- a/cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py
+++ b/cg/services/run_devices/pacbio/data_transfer_service/data_transfer_service.py
@@ -1,3 +1,7 @@
+"""Module for the Pacbio data transfer object creation service."""
+
+import logging
+
 from pydantic import ValidationError
 
 from cg.services.run_devices.abstract_classes import PostProcessingDataTransferService
@@ -17,12 +21,12 @@
     get_sequencing_run_dto,
     get_smrt_cell_dto,
 )
-from cg.services.run_devices.pacbio.metrics_parser.metrics_parser import (
-    PacBioMetricsParser,
-)
+from cg.services.run_devices.pacbio.metrics_parser.metrics_parser import PacBioMetricsParser
 from cg.services.run_devices.pacbio.metrics_parser.models import PacBioMetrics
 from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData
 
+LOG = logging.getLogger(__name__)
+
 
 class PacBioDataTransferService(PostProcessingDataTransferService):
     def __init__(self, metrics_service: PacBioMetricsParser):
@@ -41,6 +45,7 @@ def get_post_processing_dtos(self, run_data: PacBioRunData) -> PacBioDTOs:
         sample_sequencing_metrics_dtos: list[PacBioSampleSequencingMetricsDTO] = (
             get_sample_sequencing_metrics_dtos(metrics.samples)
         )
+        LOG.debug("DTOs created")
         return PacBioDTOs(
             run_device=smrt_cell_dto,
             sequencing_run=sequencing_run_dto,
diff --git a/cg/services/run_devices/pacbio/housekeeper_service/pacbio_houskeeper_service.py b/cg/services/run_devices/pacbio/housekeeper_service/pacbio_houskeeper_service.py
index e873ec87bc..6116cc017d 100644
--- a/cg/services/run_devices/pacbio/housekeeper_service/pacbio_houskeeper_service.py
+++ b/cg/services/run_devices/pacbio/housekeeper_service/pacbio_houskeeper_service.py
@@ -20,14 +20,10 @@
     PostProcessingStoreFileError,
 )
 from cg.services.run_devices.pacbio.housekeeper_service.models import PacBioFileData
-from cg.services.run_devices.pacbio.metrics_parser.metrics_parser import (
-    PacBioMetricsParser,
-)
+from cg.services.run_devices.pacbio.metrics_parser.metrics_parser import PacBioMetricsParser
 from cg.services.run_devices.pacbio.metrics_parser.models import PacBioMetrics
 from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData
-from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import (
-    PacBioRunFileManager,
-)
+from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import PacBioRunFileManager
 from cg.utils.mapping import get_item_by_pattern_in_source
 
 LOG = logging.getLogger(__name__)
@@ -64,6 +60,7 @@ def store_files_in_housekeeper(self, run_data: PacBioRunData, dry_run: bool = Fa
                 file_path=bundle_info.file_path,
                 tags=bundle_info.tags,
             )
+        LOG.debug(f"Files stored in Housekeeper for run {run_data.sequencing_run_name}")
 
     @staticmethod
     def _get_bundle_type_for_file(file_path: Path) -> str:
diff --git a/cg/services/run_devices/pacbio/metrics_parser/metrics_parser.py b/cg/services/run_devices/pacbio/metrics_parser/metrics_parser.py
index 2352a2f2d0..f5db9c09e0 100644
--- a/cg/services/run_devices/pacbio/metrics_parser/metrics_parser.py
+++ b/cg/services/run_devices/pacbio/metrics_parser/metrics_parser.py
@@ -1,3 +1,6 @@
+"""Module for teh Pacbio sequenicng metrics parsing service."""
+
+import logging
 from pathlib import Path
 
 from pydantic import ValidationError
@@ -26,6 +29,8 @@
 from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData
 from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import PacBioRunFileManager
 
+LOG = logging.getLogger(__name__)
+
 
 class PacBioMetricsParser(PostProcessingMetricsParser):
     """Class for parsing PacBio sequencing metrics."""
@@ -59,7 +64,7 @@ def parse_metrics(self, run_data: PacBioRunData) -> PacBioMetrics:
             metrics_files=metrics_files, file_name=PacBioDirsAndFiles.BARCODES_REPORT
         )
         sample_metrics: list[SampleMetrics] = get_parsed_sample_metrics(metrics_files)
-
+        LOG.debug(f"All metrics parsed for run {run_data.sequencing_run_name}")
         return PacBioMetrics(
             read=read_metrics,
             control=control_metrics,
diff --git a/cg/services/run_devices/pacbio/post_processing_service.py b/cg/services/run_devices/pacbio/post_processing_service.py
index 53fcb55e3b..35ebda2cd3 100644
--- a/cg/services/run_devices/pacbio/post_processing_service.py
+++ b/cg/services/run_devices/pacbio/post_processing_service.py
@@ -1,6 +1,8 @@
 import logging
+from pathlib import Path
 
 from cg.services.run_devices.abstract_classes import PostProcessingService
+from cg.services.run_devices.constants import POST_PROCESSING_COMPLETED
 from cg.services.run_devices.error_handler import handle_post_processing_errors
 from cg.services.run_devices.exc import (
     PostProcessingError,
@@ -49,11 +51,16 @@ def __init__(
         to_raise=PostProcessingError,
     )
     def post_process(self, run_name: str, dry_run: bool = False) -> None:
-        LOG.info(f"Starting PacBio post-processing for run: {run_name}")
+        LOG.info(f"Starting Pacbio post-processing for run: {run_name}")
         run_data: PacBioRunData = self.run_data_generator.get_run_data(
             run_name=run_name, sequencing_dir=self.sequencing_dir
         )
         self.run_validator.ensure_post_processing_can_start(run_data)
         self.store_service.store_post_processing_data(run_data=run_data, dry_run=dry_run)
         self.hk_service.store_files_in_housekeeper(run_data=run_data, dry_run=dry_run)
-        self._touch_post_processing_complete(run_data)
+        self._touch_post_processing_complete(run_data=run_data, dry_run=dry_run)
+
+    def is_run_processed(self, run_name: str) -> bool:
+        """Check if a run has been post-processed."""
+        processing_complete_file = Path(self.sequencing_dir, run_name, POST_PROCESSING_COMPLETED)
+        return processing_complete_file.exists()
diff --git a/cg/services/run_devices/pacbio/run_validator/pacbio_run_validator.py b/cg/services/run_devices/pacbio/run_validator/pacbio_run_validator.py
index 22aa4a3a35..06a266af99 100644
--- a/cg/services/run_devices/pacbio/run_validator/pacbio_run_validator.py
+++ b/cg/services/run_devices/pacbio/run_validator/pacbio_run_validator.py
@@ -1,5 +1,6 @@
-from pathlib import Path
 import logging
+from pathlib import Path
+
 from cg.constants.constants import FileFormat
 from cg.constants.pacbio import PacBioDirsAndFiles
 from cg.services.decompression_service.decompressor import Decompressor
@@ -49,7 +50,6 @@ def ensure_post_processing_can_start(self, run_data: PacBioRunData) -> None:
             source_dir=run_data.full_path,
             manifest_file_format=FileFormat.TXT,
         )
-
         self.decompressor.decompress(
             source_path=paths_information.decompression_target,
             destination_path=paths_information.decompression_destination,
@@ -57,10 +57,10 @@ def ensure_post_processing_can_start(self, run_data: PacBioRunData) -> None:
         self._touch_is_validated(run_data.full_path)
         LOG.debug(f"Run for {run_data.full_path} is validated.")
 
-    @staticmethod
-    def _touch_is_validated(run_path: Path):
-        Path(run_path, PacBioDirsAndFiles.RUN_IS_VALID).touch()
-
     @staticmethod
     def _is_validated(run_path: Path) -> bool:
         return Path(run_path, PacBioDirsAndFiles.RUN_IS_VALID).exists()
+
+    @staticmethod
+    def _touch_is_validated(run_path: Path) -> None:
+        Path(run_path, PacBioDirsAndFiles.RUN_IS_VALID).touch()
diff --git a/cg/services/run_devices/run_names/__init__.py b/cg/services/run_devices/run_names/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/cg/services/run_devices/run_names/pacbio.py b/cg/services/run_devices/run_names/pacbio.py
new file mode 100644
index 0000000000..14b24b52eb
--- /dev/null
+++ b/cg/services/run_devices/run_names/pacbio.py
@@ -0,0 +1,20 @@
+from pathlib import Path
+
+from cg.services.run_devices.run_names.service import RunNamesService
+
+
+class PacbioRunNamesService(RunNamesService):
+
+    def get_run_names(self) -> list[str]:
+        """
+        Get all the run names from the PacBio sequencing directory in the form
+        <sequencing_run>/<SMRTcell>, for example:
+            r84202_20240913_121403/1_C01
+        """
+
+        run_names = []
+        for run_folder in Path(self.run_directory).iterdir():
+            if run_folder.is_dir():
+                for cell_folder in run_folder.iterdir():
+                    run_names.append(f"{run_folder.name}/{cell_folder.name}")
+        return run_names
diff --git a/cg/services/run_devices/run_names/service.py b/cg/services/run_devices/run_names/service.py
new file mode 100644
index 0000000000..8122345f11
--- /dev/null
+++ b/cg/services/run_devices/run_names/service.py
@@ -0,0 +1,12 @@
+from abc import ABC, abstractmethod
+
+
+class RunNamesService(ABC):
+
+    def __init__(self, run_directory: str):
+        self.run_directory = run_directory
+
+    @abstractmethod
+    def get_run_names(self) -> list[str]:
+        """Get all the run names from a run directory."""
+        pass
diff --git a/cg/services/validate_file_transfer_service/validate_file_transfer_service.py b/cg/services/validate_file_transfer_service/validate_file_transfer_service.py
index a0046ca1e5..61e7e54dfa 100644
--- a/cg/services/validate_file_transfer_service/validate_file_transfer_service.py
+++ b/cg/services/validate_file_transfer_service/validate_file_transfer_service.py
@@ -1,8 +1,11 @@
+import logging
 from pathlib import Path
 
 from cg.io.controller import ReadFile
 from cg.utils.files import get_file_in_directory, get_files_in_directory_with_pattern
 
+LOG = logging.getLogger(__name__)
+
 
 class ValidateFileTransferService:
     """Service to validate file transfers via a manifest file."""
@@ -20,6 +23,7 @@ def validate_file_transfer(
             raise FileNotFoundError(
                 f"Not all files listed in the manifest file {manifest_file} are present in the directory tree {source_dir}"
             )
+        LOG.debug("File transfer validated")
         return
 
     @staticmethod
diff --git a/cg/store/crud/create.py b/cg/store/crud/create.py
index 91c0d040c7..83d19c3cb9 100644
--- a/cg/store/crud/create.py
+++ b/cg/store/crud/create.py
@@ -479,6 +479,7 @@ def add_illumina_sample_metrics_entry(
         return new_metric
 
     def create_pac_bio_smrt_cell(self, run_device_dto: PacBioSMRTCellDTO) -> PacbioSMRTCell:
+        LOG.debug(f"Creating Pacbio SMRT cell for {run_device_dto.internal_id}")
         if self.get_pac_bio_smrt_cell_by_internal_id(run_device_dto.internal_id):
             raise ValueError(f"SMRT cell with {run_device_dto.internal_id} already exists.")
         new_smrt_cell = PacbioSMRTCell(
@@ -490,6 +491,7 @@ def create_pac_bio_smrt_cell(self, run_device_dto: PacBioSMRTCellDTO) -> PacbioS
     def create_pac_bio_sequencing_run(
         self, sequencing_run_dto: PacBioSequencingRunDTO, smrt_cell: PacbioSMRTCell
     ) -> PacbioSequencingRun:
+        LOG.debug(f"Creating Pacbio sequencing run for SMRT cell {smrt_cell.internal_id}")
         new_sequencing_run = PacbioSequencingRun(
             type=sequencing_run_dto.type,
             well=sequencing_run_dto.well,
diff --git a/tests/cli/post_process/test_utils.py b/tests/cli/post_process/test_utils.py
index a505936495..9db3d83d99 100644
--- a/tests/cli/post_process/test_utils.py
+++ b/tests/cli/post_process/test_utils.py
@@ -1,6 +1,10 @@
 import pytest
 
-from cg.cli.post_process.utils import get_post_processing_service_from_run_name
+from cg.cli.post_process.utils import (
+    UnprocessedRunInfo,
+    get_post_processing_service_from_run_name,
+    get_unprocessed_runs_info,
+)
 from cg.models.cg_config import CGConfig
 from cg.services.run_devices.pacbio.post_processing_service import PacBioPostProcessingService
 
@@ -43,3 +47,32 @@ def test_get_post_processing_service_from_wrong_run_name(
         get_post_processing_service_from_run_name(context=pac_bio_context, run_name=wrong_run_name)
 
     # THEN an error is raised
+
+
+def test_get_unprocessed_runs_info_pacbio(
+    pac_bio_context: CGConfig,
+    pacbio_barcoded_sequencing_run_name: str,
+    pacbio_sequencing_run_name: str,
+):
+    """Test that a list of unprocessed runs is returned for Pacbio."""
+    # GIVEN a context with a post-processing service for Pacbio
+    assert pac_bio_context.post_processing_services.pacbio
+    instrument: str = "pacbio"
+
+    # GIVEN that there are unprocessed run in the directory
+    expected_run_names: set[str] = {pacbio_sequencing_run_name, pacbio_barcoded_sequencing_run_name}
+
+    # GIVEN that there is an already processed run in the directory
+    number_of_runs: int = len(pac_bio_context.run_names_services.pacbio.get_run_names())
+    number_unprocessed: int = len(expected_run_names)
+    assert number_of_runs > number_unprocessed
+
+    # WHEN getting the unprocessed runs
+    unprocessed_runs: list[UnprocessedRunInfo] = get_unprocessed_runs_info(
+        context=pac_bio_context, instrument=instrument
+    )
+
+    # THEN the expected unprocessed runs are returned
+    assert unprocessed_runs
+    unprocessed_run_names: set[str] = {run.name for run in unprocessed_runs}
+    assert unprocessed_run_names == expected_run_names
diff --git a/tests/conftest.py b/tests/conftest.py
index 780fcea443..4fc03bfeed 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -130,6 +130,7 @@
     "tests.fixture_plugins.pacbio_fixtures.path_fixtures",
     "tests.fixture_plugins.pacbio_fixtures.run_data_fixtures",
     "tests.fixture_plugins.pacbio_fixtures.service_fixtures",
+    "tests.fixture_plugins.pacbio_fixtures.unprocessed_runs_fixtures",
     "tests.fixture_plugins.quality_controller_fixtures.sequencing_qc_check_scenario",
     "tests.fixture_plugins.quality_controller_fixtures.sequencing_qc_fixtures",
     "tests.fixture_plugins.timestamp_fixtures",
diff --git a/tests/fixture_plugins/device_fixtures.py b/tests/fixture_plugins/device_fixtures.py
index 44977fc244..08dd87ce0c 100644
--- a/tests/fixture_plugins/device_fixtures.py
+++ b/tests/fixture_plugins/device_fixtures.py
@@ -2,8 +2,9 @@
 
 import pytest
 
-from cg.models.cg_config import PostProcessingServices
+from cg.models.cg_config import PostProcessingServices, RunNamesServices
 from cg.services.run_devices.pacbio.post_processing_service import PacBioPostProcessingService
+from cg.services.run_devices.run_names.pacbio import PacbioRunNamesService
 
 
 @pytest.fixture
@@ -11,6 +12,10 @@ def post_processing_services(
     pac_bio_post_processing_service: PacBioPostProcessingService,
 ) -> PostProcessingServices:
     """Return the post processing services."""
-    return PostProcessingServices(
-        pacbio=pac_bio_post_processing_service,
-    )
+    return PostProcessingServices(pacbio=pac_bio_post_processing_service)
+
+
+@pytest.fixture
+def run_names_services(pacbio_run_names_service: PacbioRunNamesService) -> RunNamesServices:
+    """Return the run names services."""
+    return RunNamesServices(pacbio=pacbio_run_names_service)
diff --git a/tests/fixture_plugins/pacbio_fixtures/context_fixtures.py b/tests/fixture_plugins/pacbio_fixtures/context_fixtures.py
index f7216386f5..f816537466 100644
--- a/tests/fixture_plugins/pacbio_fixtures/context_fixtures.py
+++ b/tests/fixture_plugins/pacbio_fixtures/context_fixtures.py
@@ -1,11 +1,14 @@
 import pytest
 
-from cg.models.cg_config import CGConfig, PostProcessingServices
+from cg.models.cg_config import CGConfig, PostProcessingServices, RunNamesServices
 
 
 @pytest.fixture
 def pac_bio_context(
-    cg_context: CGConfig, post_processing_services: PostProcessingServices
+    cg_context: CGConfig,
+    post_processing_services: PostProcessingServices,
+    run_names_services: RunNamesServices,
 ) -> CGConfig:
     cg_context.post_processing_services_ = post_processing_services
+    cg_context.run_names_services_ = run_names_services
     return cg_context
diff --git a/tests/fixture_plugins/pacbio_fixtures/name_fixtures.py b/tests/fixture_plugins/pacbio_fixtures/name_fixtures.py
index c11e019312..4317f19b48 100644
--- a/tests/fixture_plugins/pacbio_fixtures/name_fixtures.py
+++ b/tests/fixture_plugins/pacbio_fixtures/name_fixtures.py
@@ -24,6 +24,12 @@ def pacbio_barcoded_run_name() -> str:
     return "r84202_20240913_121403"
 
 
+@pytest.fixture
+def pacbio_sequencing_run_name(pac_bio_test_run_name: str, pac_bio_smrt_cell_name: str) -> str:
+    """Return the name of a PacBio SMRT cell."""
+    return f"{pac_bio_test_run_name}/{pac_bio_smrt_cell_name}"
+
+
 @pytest.fixture
 def pacbio_barcoded_sequencing_run_name(
     pacbio_barcoded_run_name: str, pacbio_barcoded_smrt_cell_name: str
@@ -32,6 +38,27 @@ def pacbio_barcoded_sequencing_run_name(
     return f"{pacbio_barcoded_run_name}/{pacbio_barcoded_smrt_cell_name}"
 
 
+@pytest.fixture
+def pacbio_processed_sequencing_run_name(
+    pacbio_barcoded_run_name: str, pac_bio_smrt_cell_name: str
+) -> str:
+    """Return the name of a PacBio SMRT cell."""
+    return f"{pacbio_barcoded_run_name}/{pac_bio_smrt_cell_name}"
+
+
+@pytest.fixture
+def pacbio_run_names(
+    pacbio_sequencing_run_name: str,
+    pacbio_barcoded_sequencing_run_name: str,
+    pacbio_processed_sequencing_run_name: str,
+) -> set[str]:
+    return {
+        pacbio_sequencing_run_name,
+        pacbio_barcoded_sequencing_run_name,
+        pacbio_processed_sequencing_run_name,
+    }
+
+
 @pytest.fixture
 def pacbio_barcoded_1_c01_cell_full_name() -> str:
     """Return the full name of a PacBio SMRT cell."""
diff --git a/tests/fixture_plugins/pacbio_fixtures/service_fixtures.py b/tests/fixture_plugins/pacbio_fixtures/service_fixtures.py
index 43da23757b..6449fbe1e0 100644
--- a/tests/fixture_plugins/pacbio_fixtures/service_fixtures.py
+++ b/tests/fixture_plugins/pacbio_fixtures/service_fixtures.py
@@ -1,5 +1,6 @@
 """Module for PacBio fixtures returning service objects."""
 
+from pathlib import Path
 from unittest.mock import Mock
 
 import pytest
@@ -20,8 +21,24 @@
     PacBioRunDataGenerator,
 )
 from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import PacBioRunFileManager
+from cg.services.run_devices.pacbio.run_validator.pacbio_run_validator import PacBioRunValidator
+from cg.services.run_devices.run_names.pacbio import PacbioRunNamesService
 from cg.store.store import Store
 
+# Mocked services
+
+
+@pytest.fixture
+def mock_pacbio_run_validator() -> PacBioRunValidator:
+    return PacBioRunValidator(
+        decompressor=Mock(),
+        file_transfer_validator=Mock(),
+        file_manager=Mock(),
+    )
+
+
+# Real services
+
 
 @pytest.fixture
 def pac_bio_run_data_generator() -> PacBioRunDataGenerator:
@@ -33,6 +50,11 @@ def pac_bio_run_file_manager() -> PacBioRunFileManager:
     return PacBioRunFileManager()
 
 
+@pytest.fixture
+def pacbio_run_names_service(pac_bio_runs_dir: Path) -> PacbioRunNamesService:
+    return PacbioRunNamesService(run_directory=pac_bio_runs_dir.as_posix())
+
+
 @pytest.fixture
 def pac_bio_metrics_parser(pac_bio_run_file_manager: PacBioRunFileManager) -> PacBioMetricsParser:
     return PacBioMetricsParser(file_manager=pac_bio_run_file_manager)
@@ -58,14 +80,14 @@ def pac_bio_post_processing_service(
     pac_bio_run_data_generator: PacBioRunDataGenerator,
     pac_bio_housekeeper_service: PacBioHousekeeperService,
     pac_bio_store_service: PacBioStoreService,
-    pacbio_barcoded_sequencing_run_name: str,
+    pac_bio_runs_dir: Path,
 ) -> PacBioPostProcessingService:
     return PacBioPostProcessingService(
         run_validator=Mock(),
         run_data_generator=pac_bio_run_data_generator,
         hk_service=pac_bio_housekeeper_service,
         store_service=pac_bio_store_service,
-        sequencing_dir=pacbio_barcoded_sequencing_run_name,
+        sequencing_dir=pac_bio_runs_dir.as_posix(),
     )
 
 
diff --git a/tests/fixture_plugins/pacbio_fixtures/unprocessed_runs_fixtures.py b/tests/fixture_plugins/pacbio_fixtures/unprocessed_runs_fixtures.py
new file mode 100644
index 0000000000..880a0d4e75
--- /dev/null
+++ b/tests/fixture_plugins/pacbio_fixtures/unprocessed_runs_fixtures.py
@@ -0,0 +1,35 @@
+import pytest
+
+from cg.cli.post_process.utils import UnprocessedRunInfo
+from cg.services.run_devices.pacbio.post_processing_service import PacBioPostProcessingService
+
+
+@pytest.fixture
+def pacbio_unprocessed_run_info(
+    pacbio_sequencing_run_name: str, pac_bio_post_processing_service: PacBioPostProcessingService
+) -> UnprocessedRunInfo:
+    return UnprocessedRunInfo(
+        name=pacbio_sequencing_run_name,
+        post_processing_service=pac_bio_post_processing_service,
+        instrument="pacbio",
+    )
+
+
+@pytest.fixture
+def pacbio_barcoded_unprocessed_run_info(
+    pacbio_barcoded_sequencing_run_name: str,
+    pac_bio_post_processing_service: PacBioPostProcessingService,
+) -> UnprocessedRunInfo:
+    return UnprocessedRunInfo(
+        name=pacbio_barcoded_sequencing_run_name,
+        post_processing_service=pac_bio_post_processing_service,
+        instrument="pacbio",
+    )
+
+
+@pytest.fixture
+def pacbio_unprocessed_runs(
+    pacbio_unprocessed_run_info: UnprocessedRunInfo,
+    pacbio_barcoded_unprocessed_run_info: UnprocessedRunInfo,
+) -> list[UnprocessedRunInfo]:
+    return [pacbio_unprocessed_run_info, pacbio_barcoded_unprocessed_run_info]
diff --git a/tests/fixtures/devices/pacbio/SMRTcells/r84202_20240913_121403/1_B01/post_processing_completed b/tests/fixtures/devices/pacbio/SMRTcells/r84202_20240913_121403/1_B01/post_processing_completed
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/services/run_devices/pacbio/run_validator/test_pac_bio_run_validator.py b/tests/services/run_devices/pacbio/run_validator/test_pac_bio_run_validator.py
index 4b9aa29036..6707c2c6f5 100644
--- a/tests/services/run_devices/pacbio/run_validator/test_pac_bio_run_validator.py
+++ b/tests/services/run_devices/pacbio/run_validator/test_pac_bio_run_validator.py
@@ -1,55 +1,50 @@
 from pathlib import Path
-from unittest.mock import Mock
 
 from cg.constants.pacbio import PacBioDirsAndFiles
 from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData
 from cg.services.run_devices.pacbio.run_validator.pacbio_run_validator import PacBioRunValidator
 
 
-def test_pac_bio_run_validator(tmp_path: Path):
+def test_pacbio_run_validator(tmp_path: Path, mock_pacbio_run_validator: PacBioRunValidator):
+    """Test that the run validator flow works as expected."""
     # GIVEN run data and a run validator
     run_data = PacBioRunData(
-        full_path=Path(tmp_path),
+        full_path=tmp_path,
         sequencing_run_name="not_relevant",
         well_name="not_relevant",
         plate=1,
     )
-    run_validator = PacBioRunValidator(
-        decompressor=Mock(),
-        file_transfer_validator=Mock(),
-        file_manager=Mock(),
-    )
 
-    # WHEN ensuring post processing can start
-    run_validator.ensure_post_processing_can_start(run_data)
+    # WHEN ensuring post-processing can start
+    mock_pacbio_run_validator.ensure_post_processing_can_start(run_data)
 
     # THEN the run is validated
-    assert run_validator.file_manager.get_run_validation_files.call_count == 1
-    assert run_validator.file_transfer_validator.validate_file_transfer.call_count == 1
-    assert run_validator.decompressor.decompress.call_count == 1
+    assert mock_pacbio_run_validator.file_manager.get_run_validation_files.call_count == 1
+    assert mock_pacbio_run_validator.file_transfer_validator.validate_file_transfer.call_count == 1
+    assert mock_pacbio_run_validator.decompressor.decompress.call_count == 1
     assert Path(run_data.full_path, PacBioDirsAndFiles.RUN_IS_VALID).exists()
 
 
-def test_pac_bio_run_validator_skip_if_validated(tmp_path: Path):
+def test_pacbio_run_validator_skip_if_validated(
+    tmp_path: Path, mock_pacbio_run_validator: PacBioRunValidator
+):
+    """Test that the run validator skips validation if the run is already validated."""
     # GIVEN run data and a run validator
     run_data = PacBioRunData(
-        full_path=Path(tmp_path),
+        full_path=tmp_path,
         sequencing_run_name="not_relevant",
         well_name="not_relevant",
         plate=1,
     )
-    run_validator = PacBioRunValidator(
-        decompressor=Mock(),
-        file_transfer_validator=Mock(),
-        file_manager=Mock(),
-    )
-    run_validator._touch_is_validated(run_data.full_path)
 
-    # WHEN ensuring post processing can start
-    run_validator.ensure_post_processing_can_start(run_data)
+    # GIVEN that the run is already validated
+    mock_pacbio_run_validator._touch_is_validated(run_data.full_path)
 
-    # THEN the run is validated
+    # WHEN ensuring post-processing can start
+    mock_pacbio_run_validator.ensure_post_processing_can_start(run_data)
+
+    # THEN the validation flow is skipped as the run is already validated
     assert Path(run_data.full_path, PacBioDirsAndFiles.RUN_IS_VALID).exists()
-    assert run_validator.file_manager.get_run_validation_files.call_count == 0
-    assert run_validator.file_transfer_validator.validate_file_transfer.call_count == 0
-    assert run_validator.decompressor.decompress.call_count == 0
+    assert mock_pacbio_run_validator.file_manager.get_run_validation_files.call_count == 0
+    assert mock_pacbio_run_validator.file_transfer_validator.validate_file_transfer.call_count == 0
+    assert mock_pacbio_run_validator.decompressor.decompress.call_count == 0
diff --git a/tests/services/run_names/test_pacbio_run_names_service.py b/tests/services/run_names/test_pacbio_run_names_service.py
new file mode 100644
index 0000000000..ce7136e97d
--- /dev/null
+++ b/tests/services/run_names/test_pacbio_run_names_service.py
@@ -0,0 +1,63 @@
+from pathlib import Path
+
+from cg.services.run_devices.run_names.pacbio import PacbioRunNamesService
+
+
+def test_pacbio_get_run_names(
+    pacbio_run_names_service: PacbioRunNamesService, pacbio_run_names: set
+):
+    """Test that getting PacBio run names works."""
+    # GIVEN a PacBio run names service
+
+    # WHEN getting the run names
+    run_names: list[str] = pacbio_run_names_service.get_run_names()
+
+    # THEN the run names are returned
+    assert set(run_names) == pacbio_run_names
+
+
+def test_pacbio_get_run_names_empty(tmp_path: Path):
+    # GIVEN a run directory with no run folders and a PacBio run names service
+    pacbio_run_names_service: PacbioRunNamesService = PacbioRunNamesService(
+        run_directory=tmp_path.as_posix()
+    )
+
+    # WHEN getting the run names
+    run_names: list[str] = pacbio_run_names_service.get_run_names()
+
+    # THEN no run names are returned
+    assert run_names == []
+
+
+def test_pacbio_get_run_names_with_files_return_emty(tmp_path: Path):
+    # GIVEN a run directory with files and a PacBio run names service
+    run_dir = Path(tmp_path, "run")
+    run_dir.mkdir()
+    Path(run_dir, "file1").touch()
+    Path(run_dir, "file2").touch()
+    pacbio_run_names_service: PacbioRunNamesService = PacbioRunNamesService(
+        run_directory=run_dir.as_posix()
+    )
+
+    # WHEN getting the run names
+    run_names: list[str] = pacbio_run_names_service.get_run_names()
+
+    # THEN no run names are returned
+    assert run_names == []
+
+
+def test_pacbio_get_run_names_with_subfolders_return_empty(tmp_path: Path):
+    # GIVEN a run directory with an empty subfolder and a PacBio run names service
+    run_dir = Path(tmp_path, "run")
+    run_dir.mkdir()
+    subfolder = Path(run_dir, "subfolder")
+    subfolder.mkdir()
+    pacbio_run_names_service: PacbioRunNamesService = PacbioRunNamesService(
+        run_directory=run_dir.as_posix()
+    )
+
+    # WHEN getting the run names
+    run_names: list[str] = pacbio_run_names_service.get_run_names()
+
+    # THEN no run names are returned
+    assert run_names == []