Skip to content

Commit 0551e36

Browse files
committed
add BatchCheckFileSuffixTask and tests (#2064)
1 parent a46d49c commit 0551e36

File tree

5 files changed

+215
-2
lines changed

5 files changed

+215
-2
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ Added
1616
- ``PROJECTROLES_SUPPORT_CONTACT`` setting support (#2095)
1717
- **Landingzones**
1818
- Site read-only mode support (#2051)
19+
- ``file_name_prohibit`` app setting (#2064)
1920
- **Samplesheets**
2021
- Site read-only mode support (#2051)
2122
- **Taskflowbackend**
2223
- Project deletion support (#2051)
2324
- Zone validation and moving progress indicators (#2024)
2425
- ``TASKFLOW_ZONE_PROGRESS_INTERVAL`` Django setting (#2024)
26+
- ``BatchCheckFileSuffixTask`` iRODS task (#2064)
2527

2628
Changed
2729
-------
@@ -36,6 +38,7 @@ Changed
3638
- Define app settings as ``PluginAppSettingDef`` objects (#2051)
3739
- **Taskflowbackend**
3840
- Enable no role for old owner in ``perform_owner_transfer()`` (#2051)
41+
- Rename ``BatchCheckFileTask`` to ``BatchCheckFileExistTask`` (#2064)
3942

4043
Fixed
4144
-----

docs_manual/source/sodar_release_notes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ v1.1.0 (WIP)
1414
Release for SODAR Core 1.1 upgrade and feature updates.
1515

1616
- Add landing zone validation and moving progress indicators
17+
- Add landing zone file type prohibiting
1718
- Upgrade to python-irodsclient v3.0
1819
- Upgrade to SODAR Core v1.1.2
1920
- Replace SODAR_SUPPORT_* settings with PROJECTROLES_SUPPORT_CONTACT

landingzones/plugins.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
user_modifiable=True,
6363
),
6464
PluginAppSettingDef(
65-
name='file_type_prohibit',
65+
name='file_name_prohibit',
6666
scope=APP_SETTING_SCOPE_PROJECT,
6767
type=APP_SETTING_TYPE_STRING,
6868
default='',

taskflowbackend/tasks/irods_tasks.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
INHERIT_STRINGS = {True: 'inherit', False: 'noinherit'}
4040
META_EMPTY_VALUE = 'N/A'
4141
MD5_RE = re.compile(r'([^\w.])')
42+
SUFFIX_CLEAN_RE = re.compile(r'\A\W+|\W+\Z')
4243
CHECKSUM_RETRY = 5
4344
NO_FILE_CHECKSUM_LABEL = 'None'
4445

@@ -614,7 +615,36 @@ def revert(
614615
self.revert_set_access(path, user_name, obj_target, recursive)
615616

616617

617-
class BatchCheckFilesTask(IrodsBaseTask):
618+
class BatchCheckFileSuffixTask(IrodsBaseTask):
619+
"""Batch check for prohibited file name suffixes"""
620+
621+
def execute(self, file_paths, suffixes, zone_path, *args, **kwargs):
622+
# Cleanup suffixes
623+
suffixes = [
624+
re.sub(SUFFIX_CLEAN_RE, '', s.lower().strip())
625+
for s in suffixes
626+
if not s.lower().endswith('md5')
627+
]
628+
err_paths = []
629+
for p in file_paths:
630+
if any(p.lower().endswith('.' + s) for s in suffixes):
631+
err_paths.append(p)
632+
err_len = len(err_paths)
633+
if err_len > 0:
634+
msg = '{} file{} of prohibited type found: {}'.format(
635+
err_len,
636+
's' if err_len != 1 else '',
637+
';'.join([p.replace(zone_path + '/', '') for p in err_paths]),
638+
)
639+
logger.error(msg)
640+
self._raise_irods_exception(Exception(), msg)
641+
super().execute(*args, **kwargs)
642+
643+
def revert(self, file_paths, suffixes, zone_path, *args, **kwargs):
644+
pass # Nothing to revert
645+
646+
647+
class BatchCheckFileExistTask(IrodsBaseTask):
618648
"""
619649
Batch check for existence of files and corresponding .md5 checksum files
620650
"""

taskflowbackend/tests/test_tasks.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
ZONE_TITLE,
2222
ZONE_DESC,
2323
)
24+
from landingzones.tests.test_views_taskflow import LandingZoneTaskflowMixin
2425

2526
# Samplesheets dependency
2627
from samplesheets.tests.test_io import SampleSheetIOMixin, SHEET_DIR
@@ -67,6 +68,10 @@
6768
BATCH_OBJ_NAME = 'batch_obj'
6869
BATCH_OBJ2_NAME = 'batch_obj2'
6970

71+
SUFFIX_OBJ_NAME_BAM = 'test.bam'
72+
SUFFIX_OBJ_NAME_VCF = 'test.vcf.gz'
73+
SUFFIX_OBJ_NAME_TXT = ' test.txt'
74+
7075

7176
class IRODSTaskTestBase(TaskflowViewTestBase):
7277
"""Base test class for iRODS tasks"""
@@ -1868,6 +1873,180 @@ def test_revert_mixed(self):
18681873
self.assert_irods_access(DEFAULT_USER_GROUP, self.sub_coll_path2, None)
18691874

18701875

1876+
class TestBatchCheckFileSuffixTask(
1877+
SampleSheetIOMixin,
1878+
LandingZoneMixin,
1879+
LandingZoneTaskflowMixin,
1880+
IRODSTaskTestBase,
1881+
):
1882+
"""Tests for BatchCheckFileSuffixTask"""
1883+
1884+
def setUp(self):
1885+
super().setUp()
1886+
self.investigation = self.import_isa_from_file(SHEET_PATH, self.project)
1887+
self.study = self.investigation.studies.first()
1888+
self.assay = self.study.assays.first()
1889+
self.zone = self.make_landing_zone(
1890+
title=ZONE_TITLE,
1891+
project=self.project,
1892+
user=self.user,
1893+
assay=self.assay,
1894+
description=ZONE_DESC,
1895+
)
1896+
self.make_zone_taskflow(zone=self.zone)
1897+
self.zone_path = self.irods_backend.get_path(self.zone)
1898+
self.zone_coll = self.irods.collections.get(self.zone_path)
1899+
self.obj_bam = self.make_irods_object(
1900+
self.zone_coll, SUFFIX_OBJ_NAME_BAM
1901+
)
1902+
self.obj_vcf = self.make_irods_object(
1903+
self.zone_coll, SUFFIX_OBJ_NAME_VCF
1904+
)
1905+
self.obj_txt = self.make_irods_object(
1906+
self.zone_coll, SUFFIX_OBJ_NAME_TXT
1907+
)
1908+
self.obj_paths = [
1909+
self.obj_bam.path,
1910+
self.obj_vcf.path,
1911+
self.obj_txt.path,
1912+
]
1913+
self.task_kw = {
1914+
'cls': BatchCheckFileSuffixTask,
1915+
'name': 'Check file suffixes',
1916+
'inject': {
1917+
'file_paths': self.obj_paths,
1918+
'zone_path': self.zone_path,
1919+
},
1920+
}
1921+
1922+
def test_check_bam(self):
1923+
"""Test batch file suffix check with prohibited BAM type"""
1924+
self.task_kw['inject']['suffixes'] = ['bam']
1925+
self.add_task(**self.task_kw)
1926+
with self.assertRaises(Exception) as cm:
1927+
self.run_flow()
1928+
ex = cm.exception
1929+
self.assertIn(SUFFIX_OBJ_NAME_BAM, ex)
1930+
self.assertNotIn(SUFFIX_OBJ_NAME_VCF, ex)
1931+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
1932+
1933+
def test_check_vcf(self):
1934+
"""Test check with prohibited VCF type"""
1935+
self.task_kw['inject']['suffixes'] = ['vcf.gz']
1936+
self.add_task(**self.task_kw)
1937+
with self.assertRaises(Exception) as cm:
1938+
self.run_flow()
1939+
ex = cm.exception
1940+
self.assertNotIn(SUFFIX_OBJ_NAME_BAM, ex)
1941+
self.assertIn(SUFFIX_OBJ_NAME_VCF, ex)
1942+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
1943+
1944+
def test_check_multiple(self):
1945+
"""Test check with multiple prohibited types"""
1946+
self.task_kw['inject']['suffixes'] = ['bam', 'vcf.gz']
1947+
self.add_task(**self.task_kw)
1948+
with self.assertRaises(Exception) as cm:
1949+
self.run_flow()
1950+
ex = cm.exception
1951+
self.assertIn(SUFFIX_OBJ_NAME_BAM, ex)
1952+
self.assertIn(SUFFIX_OBJ_NAME_VCF, ex)
1953+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
1954+
1955+
def test_check_multiple_not_found(self):
1956+
"""Test check with multiple types not found in files"""
1957+
self.task_kw['inject']['suffixes'] = ['mp3', 'rar']
1958+
self.add_task(**self.task_kw)
1959+
result = self.run_flow()
1960+
self.assertEqual(result, True)
1961+
1962+
def test_check_empty_list(self):
1963+
"""Test check with empty prohibition list"""
1964+
self.task_kw['inject']['suffixes'] = []
1965+
self.add_task(**self.task_kw)
1966+
result = self.run_flow()
1967+
self.assertEqual(result, True)
1968+
1969+
def test_check_notation_dot(self):
1970+
"""Test check with dot notation in list"""
1971+
self.task_kw['inject']['suffixes'] = ['.bam']
1972+
self.add_task(**self.task_kw)
1973+
with self.assertRaises(Exception) as cm:
1974+
self.run_flow()
1975+
ex = cm.exception
1976+
self.assertIn(SUFFIX_OBJ_NAME_BAM, ex)
1977+
self.assertNotIn(SUFFIX_OBJ_NAME_VCF, ex)
1978+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
1979+
1980+
def test_check_notation_asterisk(self):
1981+
"""Test check with asterisk notation in list"""
1982+
self.task_kw['inject']['suffixes'] = ['*bam']
1983+
self.add_task(**self.task_kw)
1984+
with self.assertRaises(Exception) as cm:
1985+
self.run_flow()
1986+
ex = cm.exception
1987+
self.assertIn(SUFFIX_OBJ_NAME_BAM, ex)
1988+
self.assertNotIn(SUFFIX_OBJ_NAME_VCF, ex)
1989+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
1990+
1991+
def test_check_notation_combined(self):
1992+
"""Test check with combined notation in list"""
1993+
self.task_kw['inject']['suffixes'] = ['*.bam']
1994+
self.add_task(**self.task_kw)
1995+
with self.assertRaises(Exception) as cm:
1996+
self.run_flow()
1997+
ex = cm.exception
1998+
self.assertIn(SUFFIX_OBJ_NAME_BAM, ex)
1999+
self.assertNotIn(SUFFIX_OBJ_NAME_VCF, ex)
2000+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
2001+
2002+
def test_check_extra_spaces(self):
2003+
"""Test check with extra spaces"""
2004+
self.task_kw['inject']['suffixes'] = [' bam ']
2005+
self.add_task(**self.task_kw)
2006+
with self.assertRaises(Exception) as cm:
2007+
self.run_flow()
2008+
ex = cm.exception
2009+
self.assertIn(SUFFIX_OBJ_NAME_BAM, ex)
2010+
self.assertNotIn(SUFFIX_OBJ_NAME_VCF, ex)
2011+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
2012+
2013+
def test_check_not_end_of_file(self):
2014+
"""Test check with given string not in end of file name"""
2015+
self.task_kw['inject']['suffixes'] = ['test']
2016+
self.add_task(**self.task_kw)
2017+
result = self.run_flow()
2018+
self.assertEqual(result, True)
2019+
2020+
def test_check_upper_case(self):
2021+
"""Test check with upper case string"""
2022+
self.task_kw['inject']['suffixes'] = ['BAM']
2023+
self.add_task(**self.task_kw)
2024+
with self.assertRaises(Exception) as cm:
2025+
self.run_flow()
2026+
ex = cm.exception
2027+
self.assertIn(SUFFIX_OBJ_NAME_BAM, ex)
2028+
self.assertNotIn(SUFFIX_OBJ_NAME_VCF, ex)
2029+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
2030+
2031+
def test_check_invalid_strings(self):
2032+
"""Test check with invalid strings"""
2033+
self.task_kw['inject']['suffixes'] = ['', '*', '*.*']
2034+
self.add_task(**self.task_kw)
2035+
result = self.run_flow()
2036+
self.assertEqual(result, True)
2037+
2038+
def test_check_invalid_valid(self):
2039+
"""Test check with mixed invalid and valid strings"""
2040+
self.task_kw['inject']['suffixes'] = ['', '*', 'bam']
2041+
self.add_task(**self.task_kw)
2042+
with self.assertRaises(Exception) as cm:
2043+
self.run_flow()
2044+
ex = cm.exception
2045+
self.assertIn(SUFFIX_OBJ_NAME_BAM, ex)
2046+
self.assertNotIn(SUFFIX_OBJ_NAME_VCF, ex)
2047+
self.assertNotIn(SUFFIX_OBJ_NAME_TXT, ex)
2048+
2049+
18712050
class TestBatchCreateCollectionsTask(IRODSTaskTestBase):
18722051
"""Tests for BatchCreateCollectionsTask"""
18732052

0 commit comments

Comments
 (0)