diff --git a/mibios/omics/managers.py b/mibios/omics/managers.py index 05bc0d33..4713f04c 100644 --- a/mibios/omics/managers.py +++ b/mibios/omics/managers.py @@ -2,6 +2,7 @@ Module for data load managers """ from collections import defaultdict +from contextlib import ExitStack from datetime import date from functools import partial from itertools import groupby, islice @@ -702,46 +703,51 @@ def update_from_pipeline_registry( (SUCCESS, 'import_success'), ) - # setup log file output - if log_dir := getattr(settings, 'IMPORT_LOG_DIR', ''): - # find a unique log file name and set up the log handler - path = Path(log_dir) / 'omics_status_update' - today = date.today() - suffix = f'.{today}.log' - num = 0 - while path.with_suffix(suffix).exists(): - num += 1 - suffix = f'.{today}.{num}.log' - - log_file = path.with_suffix(suffix).open('w') - print(f'Logging to: {log_file.name}') - else: - log_file = sys.stdout + # ExitStack: To cleanly close log file (if needed) and input file as + # there is some interaction with the atomic_dry wrapper + with ExitStack() as estack: + + # setup log file output + if log_dir := getattr(settings, 'IMPORT_LOG_DIR', ''): + # find a unique log file name and set up the log handler + log_file_base = Path(log_dir) / 'omics_status_update' + today = date.today() + suffix = f'.{today}.log' + num = 0 + while (log_file := log_file_base.with_suffix(suffix)).exists(): + num += 1 + suffix = f'.{today}.{num}.log' + + log_file = estack.enter_context(log_file.open('w')) + print(f'Logging to: {log_file.name}') + else: + log_file = sys.stdout - def log(msg): - if quiet and msg.startswith('INFO'): - return - print(msg, file=log_file) + def log(msg): + if quiet and msg.startswith('INFO'): + return + print(msg, file=log_file) - def err(msg): - msg = f'ERROR: {msg}' - if skip_on_error: - log(msg) - else: - raise RuntimeError(msg) + def err(msg): + msg = f'ERROR: {msg}' + if skip_on_error: + log(msg) + else: + raise RuntimeError(msg) - objs = self.select_related('dataset').in_bulk(field_name='sample_id') + objs = self.select_related('dataset') \ + .in_bulk(field_name='sample_id') - if source_file is None: - source_file = self.get_omics_import_file() + if source_file is None: + source_file = self.get_omics_import_file() - with open(source_file) as f: print(f'Reading {source_file} ...') - head = f.readline().rstrip('\n').split('\t') + srcf = estack.enter_context(open(source_file)) + head = srcf.readline().rstrip('\n').split('\t') for index, column in COLUMN_NAMES: if head[index] != column: raise RuntimeError( - f'unexpected header in {f.name}: 0-based column ' + f'unexpected header in {srcf.name}: 0-based column ' f'{index} is "{head[index]}" but expected "{column}"' ) @@ -751,7 +757,7 @@ def err(msg): unchanged = 0 notfound = 0 nosuccess = 0 - for lineno, line in enumerate(f, start=1): + for lineno, line in enumerate(srcf, start=1): row = line.rstrip('\n').split('\t') sample_id = row[SAMPLE_ID] dataset = row[STUDY_ID] @@ -827,30 +833,33 @@ def err(msg): if not new: tr.save() # update timestamp - log('Summary:') - log(f' records read from file: {lineno}') - log(f' (unique) samples listed: {len(samp_id_seen)}') - log(f' samples updated: {changed}') - if notfound: - log(f'WARNING Samples missing from database (or hidden): ' - f'{notfound}') - - if nosuccess: - log(f'WARNING Samples not marked "import_success": {nosuccess}') - - if unchanged: - log(f'Data for {unchanged} listed samples remain unchanged.') - - missing_or_bad = self.exclude(pk__in=good_seen) - if missing_or_bad.exists(): - log(f'WARNING The DB has {missing_or_bad.count()} samples ' - f'which are missing from {source_file} or which had ' - f'to be skipped for other reasons.') - - missing = self.exclude(sample_id__in=samp_id_seen) - if missing.exists(): - log(f'WARNING The DB has {missing.count()} samples not at all ' - f'listed in {source_file}') + srcf.close() + + log('Summary:') + log(f' records read from file: {lineno}') + log(f' (unique) samples listed: {len(samp_id_seen)}') + log(f' samples updated: {changed}') + if notfound: + log(f'WARNING Samples missing from database (or hidden): ' + f'{notfound}') + + if nosuccess: + log(f'WARNING Samples not marked "import_success": ' + f'{nosuccess}') + + if unchanged: + log(f'Data for {unchanged} listed samples remain unchanged.') + + missing_or_bad = self.exclude(pk__in=good_seen) + if missing_or_bad.exists(): + log(f'WARNING The DB has {missing_or_bad.count()} samples ' + f'which are missing from {source_file} or which had ' + f'to be skipped for other reasons.') + + missing = self.exclude(sample_id__in=samp_id_seen) + if missing.exists(): + log(f'WARNING The DB has {missing.count()} samples not at all ' + f'listed in {source_file}') def get_metagenomic_loader_script(self): """