Skip to content

Commit 1c2d5fb

Browse files
committed
Fix linting
1 parent 25ec386 commit 1c2d5fb

24 files changed

+894
-771
lines changed

.github/workflows/test_package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
# stop the build if there are Python syntax errors or undefined names
3535
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
3636
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37-
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37+
flake8 . --count --exit-zero --max-complexity=20 --max-line-length=127 --statistics
3838
popd
3939
- name: Run sanity tests
4040
run: |

hAMRonization/AbricateIO.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,32 @@
44
from .Interfaces import hAMRonizedResultIterator
55
from hAMRonization.constants import GENE_PRESENCE
66

7-
required_metadata = ['analysis_software_version',
8-
'reference_database_version']
7+
required_metadata = ["analysis_software_version", "reference_database_version"]
98

109

1110
class AbricateIterator(hAMRonizedResultIterator):
12-
1311
def __init__(self, source, metadata):
14-
metadata['analysis_software_name'] = 'abricate'
15-
metadata['genetic_variation_type'] = GENE_PRESENCE
12+
metadata["analysis_software_name"] = "abricate"
13+
metadata["genetic_variation_type"] = GENE_PRESENCE
1614
self.metadata = metadata
1715

1816
self.field_mapping = {
19-
'#FILE': 'input_file_name',
20-
'SEQUENCE': 'input_sequence_id',
21-
'START': 'input_gene_start',
22-
'END': 'input_gene_stop',
23-
'STRAND': 'strand_orientation',
24-
'GENE': 'gene_symbol',
25-
'PRODUCT': 'gene_name',
26-
'%COVERAGE': 'coverage_percentage',
27-
'COVERAGE': None,
28-
'%IDENTITY': 'sequence_identity',
29-
'DATABASE': 'reference_database_name',
30-
'ACCESSION': 'reference_accession',
31-
'RESISTANCE': 'drug_class',
32-
'COVERAGE_MAP': None,
33-
'GAPS': None}
17+
"#FILE": "input_file_name",
18+
"SEQUENCE": "input_sequence_id",
19+
"START": "input_gene_start",
20+
"END": "input_gene_stop",
21+
"STRAND": "strand_orientation",
22+
"GENE": "gene_symbol",
23+
"PRODUCT": "gene_name",
24+
"%COVERAGE": "coverage_percentage",
25+
"COVERAGE": None,
26+
"%IDENTITY": "sequence_identity",
27+
"DATABASE": "reference_database_name",
28+
"ACCESSION": "reference_accession",
29+
"RESISTANCE": "drug_class",
30+
"COVERAGE_MAP": None,
31+
"GAPS": None,
32+
}
3433

3534
super().__init__(source, self.field_mapping, self.metadata)
3635

@@ -39,6 +38,6 @@ def parse(self, handle):
3938
Read each and return it
4039
"""
4140
# skip any manually specified fields for later
42-
reader = csv.DictReader(handle, delimiter='\t')
41+
reader = csv.DictReader(handle, delimiter="\t")
4342
for result in reader:
4443
yield self.hAMRonize(result, self.metadata)

hAMRonization/AmrFinderPlusIO.py

Lines changed: 75 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2,78 +2,83 @@
22

33
import csv
44
import re
5-
import warnings
65
from .Interfaces import hAMRonizedResultIterator
7-
from hAMRonization.constants import NUCLEOTIDE_VARIANT, AMINO_ACID_VARIANT, GENE_PRESENCE
6+
from hAMRonization.constants import (
7+
NUCLEOTIDE_VARIANT,
8+
AMINO_ACID_VARIANT,
9+
GENE_PRESENCE,
10+
)
811

9-
required_metadata = ['analysis_software_version',
10-
'reference_database_version',
11-
'input_file_name']
12+
required_metadata = [
13+
"analysis_software_version",
14+
"reference_database_version",
15+
"input_file_name",
16+
]
1217

1318

1419
class AmrFinderPlusIterator(hAMRonizedResultIterator):
15-
1620
def __init__(self, source, metadata):
17-
metadata['analysis_software_name'] = 'amrfinderplus'
18-
metadata['reference_database_name'] = 'NCBI Reference Gene Database'
21+
metadata["analysis_software_name"] = "amrfinderplus"
22+
metadata["reference_database_name"] = "NCBI Reference Gene Database"
1923
self.metadata = metadata
2024

2125
# check source for whether AMFP has been run in protein or nt mode
2226

2327
nucleotide_field_mapping = {
24-
'Protein identifier': None,
25-
'Contig id': 'input_sequence_id',
26-
'Start': 'input_gene_start',
27-
'Stop': 'input_gene_stop',
28-
'Strand': 'strand_orientation',
29-
'Gene symbol': 'gene_symbol',
30-
'Sequence name': 'gene_name',
31-
'Scope': None,
32-
'Element type': None,
33-
'Element subtype': None,
34-
'Class': 'drug_class',
35-
'Subclass': 'antimicrobial_agent',
36-
'Method': None,
37-
'Target length': 'input_protein_length',
38-
'Reference sequence length': 'reference_protein_length',
39-
'% Coverage of reference sequence': 'coverage_percentage',
40-
'% Identity to reference sequence': 'sequence_identity',
41-
'Alignment length': None,
42-
'Accession of closest sequence': 'reference_accession',
43-
'Name of closest sequence': None,
44-
'HMM id': None,
45-
'HMM description': None,
46-
'AA Mutation': 'amino_acid_mutation',
47-
'Nucleotide Mutation': 'nucleotide_mutation',
48-
'genetic_variation_type': 'genetic_variation_type'
49-
}
50-
protein_field_mapping = {'Protein identifier': 'input_sequence_id',
51-
'Gene symbol': 'gene_symbol',
52-
'Sequence name': 'gene_name',
53-
'Scope': None,
54-
'Element': None,
55-
'Element subtype': None,
56-
'Class': 'drug_class',
57-
'Subclass': 'antimicrobial_agent',
58-
'Method': None,
59-
'Target length': 'input_protein_length',
60-
'Reference sequence length': 'reference_protein_length',
61-
'% Coverage of reference sequence': 'coverage_percentage',
62-
'% Identity to reference sequence': 'sequence_identity',
63-
'Alignment length': None,
64-
'Accession of closest sequence': 'reference_accession',
65-
'Name of closest sequence': None,
66-
'HMM id': None,
67-
'HMM description': None,
68-
'AA Mutation': 'amino_acid_mutation',
69-
'genetic_variation_type': 'genetic_variation_type'
70-
}
28+
"Protein identifier": None,
29+
"Contig id": "input_sequence_id",
30+
"Start": "input_gene_start",
31+
"Stop": "input_gene_stop",
32+
"Strand": "strand_orientation",
33+
"Gene symbol": "gene_symbol",
34+
"Sequence name": "gene_name",
35+
"Scope": None,
36+
"Element type": None,
37+
"Element subtype": None,
38+
"Class": "drug_class",
39+
"Subclass": "antimicrobial_agent",
40+
"Method": None,
41+
"Target length": "input_protein_length",
42+
"Reference sequence length": "reference_protein_length",
43+
"% Coverage of reference sequence": "coverage_percentage",
44+
"% Identity to reference sequence": "sequence_identity",
45+
"Alignment length": None,
46+
"Accession of closest sequence": "reference_accession",
47+
"Name of closest sequence": None,
48+
"HMM id": None,
49+
"HMM description": None,
50+
"AA Mutation": "amino_acid_mutation",
51+
"Nucleotide Mutation": "nucleotide_mutation",
52+
"genetic_variation_type": "genetic_variation_type",
53+
}
54+
protein_field_mapping = {
55+
"Protein identifier": "input_sequence_id",
56+
"Gene symbol": "gene_symbol",
57+
"Sequence name": "gene_name",
58+
"Scope": None,
59+
"Element": None,
60+
"Element subtype": None,
61+
"Class": "drug_class",
62+
"Subclass": "antimicrobial_agent",
63+
"Method": None,
64+
"Target length": "input_protein_length",
65+
"Reference sequence length": "reference_protein_length",
66+
"% Coverage of reference sequence": "coverage_percentage",
67+
"% Identity to reference sequence": "sequence_identity",
68+
"Alignment length": None,
69+
"Accession of closest sequence": "reference_accession",
70+
"Name of closest sequence": None,
71+
"HMM id": None,
72+
"HMM description": None,
73+
"AA Mutation": "amino_acid_mutation",
74+
"genetic_variation_type": "genetic_variation_type",
75+
}
7176

7277
with open(source) as fh:
73-
header = next(fh)
78+
_ = next(fh)
7479
try:
7580
first_result = next(fh)
76-
if first_result.strip().split('\t')[0] == 'NA':
81+
if first_result.strip().split("\t")[0] == "NA":
7782
self.field_mapping = nucleotide_field_mapping
7883
else:
7984
self.field_mapping = protein_field_mapping
@@ -84,12 +89,11 @@ def __init__(self, source, metadata):
8489

8590
super().__init__(source, self.field_mapping, self.metadata)
8691

87-
8892
def parse(self, handle):
8993
"""
9094
Read each and return it
9195
"""
92-
reader = csv.DictReader(handle, delimiter='\t')
96+
reader = csv.DictReader(handle, delimiter="\t")
9397
for result in reader:
9498
# replace NA value with None for consitency
9599
for field, value in result.items():
@@ -99,21 +103,21 @@ def parse(self, handle):
99103
# "POINT" indicates mutational resistance
100104
# amrfinderplus has no special fields but the mutation itself is
101105
# appended to the symbol name so we want to split this
102-
result['AA Mutation'] = None
103-
result['Nucleotide Mutation'] = None
104-
result['genetic_variation_type'] = GENE_PRESENCE
106+
result["AA Mutation"] = None
107+
result["Nucleotide Mutation"] = None
108+
result["genetic_variation_type"] = GENE_PRESENCE
105109

106-
if result['Element subtype'] == 'POINT':
107-
gene_symbol, mutation = result["Gene symbol"].rsplit('_', 1)
108-
result['Gene symbol'] = gene_symbol
110+
if result["Element subtype"] == "POINT":
111+
gene_symbol, mutation = result["Gene symbol"].rsplit("_", 1)
112+
result["Gene symbol"] = gene_symbol
109113
_, ref, pos, alt, _ = re.split(r"(\D+)(\d+)(\D+)", mutation)
110114
# this means it is a protein mutation
111-
if result['Method'] in ['POINTX', 'POINTP']:
112-
result['AA Mutation'] = f"p.{ref}{pos}{alt}"
113-
result['genetic_variation_type'] = AMINO_ACID_VARIANT
114-
elif result['Method'] == 'POINTN':
115+
if result["Method"] in ["POINTX", "POINTP"]:
116+
result["AA Mutation"] = f"p.{ref}{pos}{alt}"
117+
result["genetic_variation_type"] = AMINO_ACID_VARIANT
118+
elif result["Method"] == "POINTN":
115119
# e.g., 23S_G2032G ampC_C-11C -> c.2032G>G
116-
result['Nucleotide Mutation'] = f"c.{pos}{ref}>{alt}"
117-
result['genetic_variation_type'] = NUCLEOTIDE_VARIANT
120+
result["Nucleotide Mutation"] = f"c.{pos}{ref}>{alt}"
121+
result["genetic_variation_type"] = NUCLEOTIDE_VARIANT
118122

119123
yield self.hAMRonize(result, self.metadata)

hAMRonization/AmrPlusPlusIO.py

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,31 @@
44
from .Interfaces import hAMRonizedResultIterator
55
from hAMRonization.constants import GENE_PRESENCE
66

7-
required_metadata = ['analysis_software_version',
8-
'reference_database_version',
9-
'input_file_name']
7+
required_metadata = [
8+
"analysis_software_version",
9+
"reference_database_version",
10+
"input_file_name",
11+
]
1012

1113

1214
class AmrPlusPlusIterator(hAMRonizedResultIterator):
13-
1415
def __init__(self, source, metadata):
15-
metadata['analysis_software_name'] = 'amrplusplus'
16-
metadata['reference_database_name'] = 'megares'
17-
metadata['genetic_variation_type'] = GENE_PRESENCE
16+
metadata["analysis_software_name"] = "amrplusplus"
17+
metadata["reference_database_name"] = "megares"
18+
metadata["genetic_variation_type"] = GENE_PRESENCE
1819

1920
self.metadata = metadata
2021
self.field_mapping = {
21-
# Sample Gene Hits Gene Fraction
22-
"Sample": "input_file_name",
23-
"Gene": None,
24-
'Gene Fraction': 'coverage_percentage',
25-
# following will be extacted from gene
26-
"_reference_accession": "reference_accession",
27-
"_gene_name": "gene_name",
28-
"_gene_symbol": "gene_symbol",
29-
"_drug_class": "drug_class"}
22+
# Sample Gene Hits Gene Fraction
23+
"Sample": "input_file_name",
24+
"Gene": None,
25+
"Gene Fraction": "coverage_percentage",
26+
# following will be extacted from gene
27+
"_reference_accession": "reference_accession",
28+
"_gene_name": "gene_name",
29+
"_gene_symbol": "gene_symbol",
30+
"_drug_class": "drug_class",
31+
}
3032

3133
super().__init__(source, self.field_mapping, self.metadata)
3234

@@ -35,12 +37,13 @@ def parse(self, handle):
3537
Read each and return it
3638
"""
3739
# skip any manually specified fields for later
38-
reader = csv.DictReader(handle, delimiter='\t')
40+
reader = csv.DictReader(handle, delimiter="\t")
3941
for result in reader:
40-
hit_information = result['Gene'].\
41-
replace('|RequiresSNPConfirmation', '').split('|')
42-
result['_reference_accession'] = hit_information[0]
43-
result['_drug_class'] = hit_information[2]
44-
result['_gene_symbol'] = hit_information[-1]
45-
result['_gene_name'] = hit_information[-2]
42+
hit_information = (
43+
result["Gene"].replace("|RequiresSNPConfirmation", "").split("|")
44+
)
45+
result["_reference_accession"] = hit_information[0]
46+
result["_drug_class"] = hit_information[2]
47+
result["_gene_symbol"] = hit_information[-1]
48+
result["_gene_name"] = hit_information[-2]
4649
yield self.hAMRonize(result, self.metadata)

0 commit comments

Comments
 (0)