pha4ge
diff --git a/‎.github/workflows/test_package.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test_package.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎hAMRonization/AbricateIO.py
Lines changed: 20 additions & 21 deletions b/‎hAMRonization/AbricateIO.py
Lines changed: 20 additions & 21 deletions
diff --git a/‎hAMRonization/AmrFinderPlusIO.py
Lines changed: 75 additions & 71 deletions b/‎hAMRonization/AmrFinderPlusIO.py
Lines changed: 75 additions & 71 deletions
diff --git a/‎hAMRonization/AmrPlusPlusIO.py
Lines changed: 26 additions & 23 deletions b/‎hAMRonization/AmrPlusPlusIO.py
Lines changed: 26 additions & 23 deletions
@@ -34,7 +34,7 @@ jobs:
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        flake8 . --count --exit-zero --max-complexity=20 --max-line-length=127 --statistics
         popd
     - name: Run sanity tests
       run: |
 
@@ -4,33 +4,32 @@
 from .Interfaces import hAMRonizedResultIterator
 from hAMRonization.constants import GENE_PRESENCE
 
-required_metadata = ['analysis_software_version',
-                     'reference_database_version']
+required_metadata = ["analysis_software_version", "reference_database_version"]
 
 
 class AbricateIterator(hAMRonizedResultIterator):
-
     def __init__(self, source, metadata):
-        metadata['analysis_software_name'] = 'abricate'
-        metadata['genetic_variation_type'] = GENE_PRESENCE
+        metadata["analysis_software_name"] = "abricate"
+        metadata["genetic_variation_type"] = GENE_PRESENCE
         self.metadata = metadata
 
         self.field_mapping = {
-                '#FILE': 'input_file_name',
-                'SEQUENCE': 'input_sequence_id',
-                'START': 'input_gene_start',
-                'END': 'input_gene_stop',
-                'STRAND': 'strand_orientation',
-                'GENE': 'gene_symbol',
-                'PRODUCT': 'gene_name',
-                '%COVERAGE': 'coverage_percentage',
-                'COVERAGE': None,
-                '%IDENTITY': 'sequence_identity',
-                'DATABASE': 'reference_database_name',
-                'ACCESSION': 'reference_accession',
-                'RESISTANCE': 'drug_class',
-                'COVERAGE_MAP': None,
-                'GAPS': None}
+            "#FILE": "input_file_name",
+            "SEQUENCE": "input_sequence_id",
+            "START": "input_gene_start",
+            "END": "input_gene_stop",
+            "STRAND": "strand_orientation",
+            "GENE": "gene_symbol",
+            "PRODUCT": "gene_name",
+            "%COVERAGE": "coverage_percentage",
+            "COVERAGE": None,
+            "%IDENTITY": "sequence_identity",
+            "DATABASE": "reference_database_name",
+            "ACCESSION": "reference_accession",
+            "RESISTANCE": "drug_class",
+            "COVERAGE_MAP": None,
+            "GAPS": None,
+        }
 
         super().__init__(source, self.field_mapping, self.metadata)
 
@@ -39,6 +38,6 @@ def parse(self, handle):
         Read each and return it
         """
         # skip any manually specified fields for later
-        reader = csv.DictReader(handle, delimiter='\t')
+        reader = csv.DictReader(handle, delimiter="\t")
         for result in reader:
             yield self.hAMRonize(result, self.metadata)
@@ -2,78 +2,83 @@
 
 import csv
 import re
-import warnings
 from .Interfaces import hAMRonizedResultIterator
-from hAMRonization.constants import NUCLEOTIDE_VARIANT, AMINO_ACID_VARIANT, GENE_PRESENCE
+from hAMRonization.constants import (
+    NUCLEOTIDE_VARIANT,
+    AMINO_ACID_VARIANT,
+    GENE_PRESENCE,
+)
 
-required_metadata = ['analysis_software_version',
-                     'reference_database_version',
-                     'input_file_name']
+required_metadata = [
+    "analysis_software_version",
+    "reference_database_version",
+    "input_file_name",
+]
 
 
 class AmrFinderPlusIterator(hAMRonizedResultIterator):
-
     def __init__(self, source, metadata):
-        metadata['analysis_software_name'] = 'amrfinderplus'
-        metadata['reference_database_name'] = 'NCBI Reference Gene Database'
+        metadata["analysis_software_name"] = "amrfinderplus"
+        metadata["reference_database_name"] = "NCBI Reference Gene Database"
         self.metadata = metadata
 
         # check source for whether AMFP has been run in protein or nt mode
 
         nucleotide_field_mapping = {
-                    'Protein identifier': None,
-                    'Contig id': 'input_sequence_id',
-                    'Start': 'input_gene_start',
-                    'Stop': 'input_gene_stop',
-                    'Strand': 'strand_orientation',
-                    'Gene symbol': 'gene_symbol',
-                    'Sequence name': 'gene_name',
-                    'Scope': None,
-                    'Element type': None,
-                    'Element subtype': None,
-                    'Class': 'drug_class',
-                    'Subclass': 'antimicrobial_agent',
-                    'Method': None,
-                    'Target length': 'input_protein_length',
-                    'Reference sequence length': 'reference_protein_length',
-                    '% Coverage of reference sequence': 'coverage_percentage',
-                    '% Identity to reference sequence': 'sequence_identity',
-                    'Alignment length': None,
-                    'Accession of closest sequence': 'reference_accession',
-                    'Name of closest sequence': None,
-                    'HMM id': None,
-                    'HMM description': None,
-                    'AA Mutation': 'amino_acid_mutation',
-                    'Nucleotide Mutation': 'nucleotide_mutation',
-                    'genetic_variation_type': 'genetic_variation_type'
-               }
-        protein_field_mapping = {'Protein identifier': 'input_sequence_id',
-                    'Gene symbol': 'gene_symbol',
-                    'Sequence name': 'gene_name',
-                    'Scope': None,
-                    'Element': None,
-                    'Element subtype': None,
-                    'Class': 'drug_class',
-                    'Subclass': 'antimicrobial_agent',
-                    'Method': None,
-                    'Target length': 'input_protein_length',
-                    'Reference sequence length': 'reference_protein_length',
-                    '% Coverage of reference sequence': 'coverage_percentage',
-                    '% Identity to reference sequence': 'sequence_identity',
-                    'Alignment length': None,
-                    'Accession of closest sequence': 'reference_accession',
-                    'Name of closest sequence': None,
-                    'HMM id': None,
-                    'HMM description': None,
-                    'AA Mutation': 'amino_acid_mutation',
-                    'genetic_variation_type': 'genetic_variation_type'
-                    }
+            "Protein identifier": None,
+            "Contig id": "input_sequence_id",
+            "Start": "input_gene_start",
+            "Stop": "input_gene_stop",
+            "Strand": "strand_orientation",
+            "Gene symbol": "gene_symbol",
+            "Sequence name": "gene_name",
+            "Scope": None,
+            "Element type": None,
+            "Element subtype": None,
+            "Class": "drug_class",
+            "Subclass": "antimicrobial_agent",
+            "Method": None,
+            "Target length": "input_protein_length",
+            "Reference sequence length": "reference_protein_length",
+            "% Coverage of reference sequence": "coverage_percentage",
+            "% Identity to reference sequence": "sequence_identity",
+            "Alignment length": None,
+            "Accession of closest sequence": "reference_accession",
+            "Name of closest sequence": None,
+            "HMM id": None,
+            "HMM description": None,
+            "AA Mutation": "amino_acid_mutation",
+            "Nucleotide Mutation": "nucleotide_mutation",
+            "genetic_variation_type": "genetic_variation_type",
+        }
+        protein_field_mapping = {
+            "Protein identifier": "input_sequence_id",
+            "Gene symbol": "gene_symbol",
+            "Sequence name": "gene_name",
+            "Scope": None,
+            "Element": None,
+            "Element subtype": None,
+            "Class": "drug_class",
+            "Subclass": "antimicrobial_agent",
+            "Method": None,
+            "Target length": "input_protein_length",
+            "Reference sequence length": "reference_protein_length",
+            "% Coverage of reference sequence": "coverage_percentage",
+            "% Identity to reference sequence": "sequence_identity",
+            "Alignment length": None,
+            "Accession of closest sequence": "reference_accession",
+            "Name of closest sequence": None,
+            "HMM id": None,
+            "HMM description": None,
+            "AA Mutation": "amino_acid_mutation",
+            "genetic_variation_type": "genetic_variation_type",
+        }
 
         with open(source) as fh:
-            header = next(fh)
+            _ = next(fh)
             try:
                 first_result = next(fh)
-                if first_result.strip().split('\t')[0] == 'NA':
+                if first_result.strip().split("\t")[0] == "NA":
                     self.field_mapping = nucleotide_field_mapping
                 else:
                     self.field_mapping = protein_field_mapping
@@ -84,12 +89,11 @@ def __init__(self, source, metadata):
 
         super().__init__(source, self.field_mapping, self.metadata)
 
-
     def parse(self, handle):
         """
         Read each and return it
         """
-        reader = csv.DictReader(handle, delimiter='\t')
+        reader = csv.DictReader(handle, delimiter="\t")
         for result in reader:
             # replace NA value with None for consitency
             for field, value in result.items():
@@ -99,21 +103,21 @@ def parse(self, handle):
             # "POINT" indicates mutational resistance
             # amrfinderplus has no special fields but the mutation itself is
             # appended to the symbol name so we want to split this
-            result['AA Mutation'] = None
-            result['Nucleotide Mutation'] = None
-            result['genetic_variation_type'] = GENE_PRESENCE
+            result["AA Mutation"] = None
+            result["Nucleotide Mutation"] = None
+            result["genetic_variation_type"] = GENE_PRESENCE
 
-            if result['Element subtype'] == 'POINT':
-                gene_symbol, mutation = result["Gene symbol"].rsplit('_', 1)
-                result['Gene symbol'] = gene_symbol
+            if result["Element subtype"] == "POINT":
+                gene_symbol, mutation = result["Gene symbol"].rsplit("_", 1)
+                result["Gene symbol"] = gene_symbol
                 _, ref, pos, alt, _ = re.split(r"(\D+)(\d+)(\D+)", mutation)
                 # this means it is a protein mutation
-                if result['Method'] in ['POINTX', 'POINTP']:
-                    result['AA Mutation'] = f"p.{ref}{pos}{alt}"
-                    result['genetic_variation_type'] = AMINO_ACID_VARIANT
-                elif result['Method'] == 'POINTN':
+                if result["Method"] in ["POINTX", "POINTP"]:
+                    result["AA Mutation"] = f"p.{ref}{pos}{alt}"
+                    result["genetic_variation_type"] = AMINO_ACID_VARIANT
+                elif result["Method"] == "POINTN":
                     # e.g., 23S_G2032G ampC_C-11C -> c.2032G>G
-                    result['Nucleotide Mutation'] = f"c.{pos}{ref}>{alt}"
-                    result['genetic_variation_type'] = NUCLEOTIDE_VARIANT
+                    result["Nucleotide Mutation"] = f"c.{pos}{ref}>{alt}"
+                    result["genetic_variation_type"] = NUCLEOTIDE_VARIANT
 
             yield self.hAMRonize(result, self.metadata)
@@ -4,29 +4,31 @@
 from .Interfaces import hAMRonizedResultIterator
 from hAMRonization.constants import GENE_PRESENCE
 
-required_metadata = ['analysis_software_version',
-                     'reference_database_version',
-                     'input_file_name']
+required_metadata = [
+    "analysis_software_version",
+    "reference_database_version",
+    "input_file_name",
+]
 
 
 class AmrPlusPlusIterator(hAMRonizedResultIterator):
-
     def __init__(self, source, metadata):
-        metadata['analysis_software_name'] = 'amrplusplus'
-        metadata['reference_database_name'] = 'megares'
-        metadata['genetic_variation_type'] = GENE_PRESENCE
+        metadata["analysis_software_name"] = "amrplusplus"
+        metadata["reference_database_name"] = "megares"
+        metadata["genetic_variation_type"] = GENE_PRESENCE
 
         self.metadata = metadata
         self.field_mapping = {
-                # Sample  Gene    Hits    Gene Fraction
-                "Sample": "input_file_name",
-                "Gene": None,
-                'Gene Fraction': 'coverage_percentage',
-                # following will be extacted from gene
-                "_reference_accession": "reference_accession",
-                "_gene_name": "gene_name",
-                "_gene_symbol": "gene_symbol",
-                "_drug_class": "drug_class"}
+            # Sample  Gene    Hits    Gene Fraction
+            "Sample": "input_file_name",
+            "Gene": None,
+            "Gene Fraction": "coverage_percentage",
+            # following will be extacted from gene
+            "_reference_accession": "reference_accession",
+            "_gene_name": "gene_name",
+            "_gene_symbol": "gene_symbol",
+            "_drug_class": "drug_class",
+        }
 
         super().__init__(source, self.field_mapping, self.metadata)
 
@@ -35,12 +37,13 @@ def parse(self, handle):
         Read each and return it
         """
         # skip any manually specified fields for later
-        reader = csv.DictReader(handle, delimiter='\t')
+        reader = csv.DictReader(handle, delimiter="\t")
         for result in reader:
-            hit_information = result['Gene'].\
-                    replace('|RequiresSNPConfirmation', '').split('|')
-            result['_reference_accession'] = hit_information[0]
-            result['_drug_class'] = hit_information[2]
-            result['_gene_symbol'] = hit_information[-1]
-            result['_gene_name'] = hit_information[-2]
+            hit_information = (
+                result["Gene"].replace("|RequiresSNPConfirmation", "").split("|")
+            )
+            result["_reference_accession"] = hit_information[0]
+            result["_drug_class"] = hit_information[2]
+            result["_gene_symbol"] = hit_information[-1]
+            result["_gene_name"] = hit_information[-2]
             yield self.hAMRonize(result, self.metadata)