1
1
from pathlib import Path
2
2
3
3
from cg .constants import FileExtensions
4
- from cg .constants .pacbio import PacBioDirsAndFiles
4
+ from cg .constants .pacbio import PacBioDirsAndFiles , MANIFEST_FILE_PATTERN , ZIPPED_REPORTS_PATTERN
5
5
from cg .services .run_devices .abstract_classes import RunFileManager
6
6
from cg .services .run_devices .error_handler import handle_post_processing_errors
7
7
from cg .services .run_devices .exc import PostProcessingRunFileManagerError
8
8
from cg .services .run_devices .pacbio .run_data_generator .run_data import PacBioRunData
9
+ from cg .services .run_devices .pacbio .run_file_manager .models import PacBioRunValidatorFiles
9
10
from cg .services .run_devices .validators import validate_files_or_directories_exist
10
11
from cg .utils .files import get_files_matching_pattern
11
12
@@ -29,10 +30,22 @@ def get_files_to_store(self, run_data: PacBioRunData) -> list[Path]:
29
30
run_path : Path = run_data .full_path
30
31
return self .get_files_to_parse (run_data ) + self ._get_hifi_read_files (run_path )
31
32
32
- @staticmethod
33
- def _get_ccs_report_file (run_path : Path ) -> Path :
33
+ @handle_post_processing_errors (
34
+ to_except = (FileNotFoundError ,), to_raise = PostProcessingRunFileManagerError
35
+ )
36
+ def get_run_validation_files (self , run_data : PacBioRunData ) -> PacBioRunValidatorFiles :
37
+ manifest_file : Path = self ._get_manifest_file (run_data .full_path )
38
+ decompression_target : Path = self ._get_zipped_reports_file (run_data .full_path )
39
+ decompression_destination : Path = self ._get_unzipped_reports_dir (run_data .full_path )
40
+ return PacBioRunValidatorFiles (
41
+ manifest_file = manifest_file ,
42
+ decompression_target = decompression_target ,
43
+ decompression_destination = decompression_destination ,
44
+ )
45
+
46
+ def _get_ccs_report_file (self , run_path : Path ) -> Path :
34
47
"""Return the path to the CCS report file."""
35
- statistics_dir : Path = Path (run_path , PacBioDirsAndFiles . STATISTICS_DIR )
48
+ statistics_dir : Path = self . _get_statistics_dir (run_path )
36
49
files : list [Path ] = get_files_matching_pattern (
37
50
directory = statistics_dir , pattern = f"*{ PacBioDirsAndFiles .CCS_REPORT_SUFFIX } "
38
51
)
@@ -42,9 +55,7 @@ def _get_ccs_report_file(run_path: Path) -> Path:
42
55
43
56
def _get_report_files (self , run_path : Path ) -> list [Path ]:
44
57
"""Return the paths to the unzipped report files."""
45
- unzipped_dir : Path = Path (
46
- run_path , PacBioDirsAndFiles .STATISTICS_DIR , PacBioDirsAndFiles .UNZIPPED_REPORTS_DIR
47
- )
58
+ unzipped_dir : Path = self ._get_unzipped_reports_dir (run_path )
48
59
report_files : list [Path ] = [
49
60
Path (unzipped_dir , PacBioDirsAndFiles .CONTROL_REPORT ),
50
61
Path (unzipped_dir , PacBioDirsAndFiles .LOADING_REPORT ),
@@ -64,3 +75,28 @@ def _get_hifi_read_files(run_path: Path) -> list[Path]:
64
75
)
65
76
validate_files_or_directories_exist (bam_files )
66
77
return bam_files
78
+
79
+ @staticmethod
80
+ def _get_unzipped_reports_dir (run_path ) -> Path :
81
+ return Path (
82
+ run_path , PacBioDirsAndFiles .STATISTICS_DIR , PacBioDirsAndFiles .UNZIPPED_REPORTS_DIR
83
+ )
84
+
85
+ @staticmethod
86
+ def _get_statistics_dir (run_path ) -> Path :
87
+ return Path (run_path , PacBioDirsAndFiles .STATISTICS_DIR )
88
+
89
+ @staticmethod
90
+ def _get_manifest_file (run_path ) -> Path :
91
+ file_list : list [Path ] = get_files_matching_pattern (
92
+ directory = Path (run_path , PacBioDirsAndFiles .METADATA_DIR ), pattern = MANIFEST_FILE_PATTERN
93
+ )
94
+ if not file_list :
95
+ raise FileNotFoundError (f"No Manifest file found in { run_path } " )
96
+ return file_list [0 ]
97
+
98
+ def _get_zipped_reports_file (self , run_path ) -> Path :
99
+ return get_files_matching_pattern (
100
+ directory = self ._get_statistics_dir (run_path ),
101
+ pattern = ZIPPED_REPORTS_PATTERN ,
102
+ )[0 ]
0 commit comments