From 36557bafbe4591114c9edda7576d5a7c0e74e9cf Mon Sep 17 00:00:00 2001 From: Finlay Maguire Date: Tue, 21 May 2024 17:24:56 -0300 Subject: [PATCH] Add tests for resfinderv4.5 fastq output resolves #86 --- .../resfinder_inputfastq_inputfasta.txt | 2 ++ .../resfinder/resfinder_inputfasta_tab.txt | 15 +++++++++++ .../resfinder_inputfastq_inputfasta.txt | 14 ++++++++++ .../resfinder/resfinder_inputfastq_tab.txt | 14 ++++++++++ test/run_integration_test.sh | 13 +++++++-- test/test_parsing_validity.py | 27 ++++++++++++++----- 6 files changed, 77 insertions(+), 8 deletions(-) create mode 100644 test/data/dummy/resfinder/resfinder_inputfastq_inputfasta.txt create mode 100644 test/data/raw_outputs/resfinder/resfinder_inputfasta_tab.txt create mode 100644 test/data/raw_outputs/resfinder/resfinder_inputfastq_inputfasta.txt create mode 100644 test/data/raw_outputs/resfinder/resfinder_inputfastq_tab.txt diff --git a/test/data/dummy/resfinder/resfinder_inputfastq_inputfasta.txt b/test/data/dummy/resfinder/resfinder_inputfastq_inputfasta.txt new file mode 100644 index 0000000..77a1a72 --- /dev/null +++ b/test/data/dummy/resfinder/resfinder_inputfastq_inputfasta.txt @@ -0,0 +1,2 @@ +Resistance gene Identity Alignment Length/Gene Length Coverage Position in reference Contig Position in contig Phenotype Accession no. +aph(6)-Id 100.00 837/837 100.0 1..838 NA NA..NA Streptomycin M28829 diff --git a/test/data/raw_outputs/resfinder/resfinder_inputfasta_tab.txt b/test/data/raw_outputs/resfinder/resfinder_inputfasta_tab.txt new file mode 100644 index 0000000..e0861c9 --- /dev/null +++ b/test/data/raw_outputs/resfinder/resfinder_inputfasta_tab.txt @@ -0,0 +1,15 @@ +Resistance gene Identity Alignment Length/Gene Length Coverage Position in reference Contig Position in contig Phenotype Accession no. +aph(6)-Id 100.00 837/837 100.0 1..837 CCI165_S85_contig_59 length 7808 coverage 252.0 normalized_cov 1.38 2438..3274 Streptomycin M28829 +aph(3'')-Ib 100.00 803/804 99.87562189054727 2..804 CCI165_S85_contig_59 length 7808 coverage 252.0 normalized_cov 1.38 3274..4076 Streptomycin AF024602 +aph(3'')-Ib 99.88 804/804 100.0 1..804 CCI165_S85_contig_59 length 7808 coverage 252.0 normalized_cov 1.38 3274..4077 Streptomycin AF321551 +aph(3'')-Ib 99.88 804/804 100.0 1..804 CCI165_S85_contig_59 length 7808 coverage 252.0 normalized_cov 1.38 3274..4077 Streptomycin AF313472 +aph(3'')-Ib 99.88 804/804 100.0 1..804 CCI165_S85_contig_59 length 7808 coverage 252.0 normalized_cov 1.38 3274..4077 Streptomycin AF321550 +aadA1 100.00 792/792 100.0 1..792 CCI165_S85_contig_74 length 3262 coverage 232.9 normalized_cov 1.28 451..1242 Spectinomycin, Streptomycin JX185132 +blaOKP-B-17 100.00 860/861 99.88385598141696 1..860 CCI165_S85_contig_2 length 348456 coverage 150.2 normalized_cov 0.82 263329..264188 Unknown Beta-lactam AM850919 +fosA 93.81 420/420 100.0 1..420 CCI165_S85_contig_13 length 151149 coverage 198.6 normalized_cov 1.09 127705..128124 Fosfomycin ACZD01000244 +OqxB 95.40 3153/3153 100.0 1..3153 CCI165_S85_contig_8 length 181163 coverage 173.9 normalized_cov 0.95 45285..48437 Chloramphenicol, Nalidixic acid, Ciprofloxacin, Trimethoprim EU370913 +OqxA 93.62 1176/1176 100.0 1..1176 CCI165_S85_contig_8 length 181163 coverage 173.9 normalized_cov 0.95 44086..45261 Chloramphenicol, Nalidixic acid, Ciprofloxacin, Trimethoprim EU370913 +sul2 100.00 816/816 100.0 1..816 CCI165_S85_contig_63 length 5843 coverage 215.4 normalized_cov 1.18 4351..5166 Sulfamethoxazole AY034138 +dfrA1 99.79 474/474 100.0 1..474 CCI165_S85_contig_74 length 3262 coverage 232.9 normalized_cov 1.28 1335..1808 Trimethoprim AF203818 +dfrA1 99.79 474/474 100.0 1..474 CCI165_S85_contig_74 length 3262 coverage 232.9 normalized_cov 1.28 1335..1808 Trimethoprim AJ238350 +dfrA1 99.79 474/474 100.0 1..474 CCI165_S85_contig_74 length 3262 coverage 232.9 normalized_cov 1.28 1335..1808 Trimethoprim X00926 diff --git a/test/data/raw_outputs/resfinder/resfinder_inputfastq_inputfasta.txt b/test/data/raw_outputs/resfinder/resfinder_inputfastq_inputfasta.txt new file mode 100644 index 0000000..d55c916 --- /dev/null +++ b/test/data/raw_outputs/resfinder/resfinder_inputfastq_inputfasta.txt @@ -0,0 +1,14 @@ +Resistance gene Identity Alignment Length/Gene Length Coverage Position in reference Contig Position in contig Phenotype Accession no. +aph(6)-Id 100.00 837/837 100.0 1..838 NA NA..NA Streptomycin M28829 +aph(3'')-Ib 93.78 804/804 93.78 1..805 NA NA..NA Streptomycin AF321550 +aadA1 100.00 792/792 100.0 1..793 NA NA..NA Spectinomycin, Streptomycin JX185132 +aph(3'')-Ib 99.88 803/804 99.88 2..805 NA NA..NA Streptomycin AF024602 +aph(3'')-Ib 82.71 667/804 82.96 1..668 NA NA..NA Streptomycin AF321551 +blaOKP-B-17 99.88 861/861 100.0 1..862 NA NA..NA Unknown Beta-lactam AM850919 +fosA5 93.33 420/420 100.0 1..421 NA NA..NA Fosfomycin EU195449 +fosA 93.57 420/420 100.0 1..421 NA NA..NA Fosfomycin AFBO01000747 +fosA 93.81 420/420 100.0 1..421 NA NA..NA Fosfomycin ACZD01000244 +OqxA 93.62 1176/1176 100.0 1..1177 NA NA..NA Chloramphenicol, Nalidixic acid, Ciprofloxacin, Trimethoprim EU370913 +OqxB 95.40 3153/3153 100.0 1..3154 NA NA..NA Chloramphenicol, Nalidixic acid, Ciprofloxacin, Trimethoprim EU370913 +sul2 100.00 816/816 100.0 1..817 NA NA..NA Sulfamethoxazole AY034138 +dfrA1 96.41 458/474 96.62 1..459 NA NA..NA Trimethoprim EU089668 diff --git a/test/data/raw_outputs/resfinder/resfinder_inputfastq_tab.txt b/test/data/raw_outputs/resfinder/resfinder_inputfastq_tab.txt new file mode 100644 index 0000000..d55c916 --- /dev/null +++ b/test/data/raw_outputs/resfinder/resfinder_inputfastq_tab.txt @@ -0,0 +1,14 @@ +Resistance gene Identity Alignment Length/Gene Length Coverage Position in reference Contig Position in contig Phenotype Accession no. +aph(6)-Id 100.00 837/837 100.0 1..838 NA NA..NA Streptomycin M28829 +aph(3'')-Ib 93.78 804/804 93.78 1..805 NA NA..NA Streptomycin AF321550 +aadA1 100.00 792/792 100.0 1..793 NA NA..NA Spectinomycin, Streptomycin JX185132 +aph(3'')-Ib 99.88 803/804 99.88 2..805 NA NA..NA Streptomycin AF024602 +aph(3'')-Ib 82.71 667/804 82.96 1..668 NA NA..NA Streptomycin AF321551 +blaOKP-B-17 99.88 861/861 100.0 1..862 NA NA..NA Unknown Beta-lactam AM850919 +fosA5 93.33 420/420 100.0 1..421 NA NA..NA Fosfomycin EU195449 +fosA 93.57 420/420 100.0 1..421 NA NA..NA Fosfomycin AFBO01000747 +fosA 93.81 420/420 100.0 1..421 NA NA..NA Fosfomycin ACZD01000244 +OqxA 93.62 1176/1176 100.0 1..1177 NA NA..NA Chloramphenicol, Nalidixic acid, Ciprofloxacin, Trimethoprim EU370913 +OqxB 95.40 3153/3153 100.0 1..3154 NA NA..NA Chloramphenicol, Nalidixic acid, Ciprofloxacin, Trimethoprim EU370913 +sul2 100.00 816/816 100.0 1..817 NA NA..NA Sulfamethoxazole AY034138 +dfrA1 96.41 458/474 96.62 1..459 NA NA..NA Trimethoprim EU089668 diff --git a/test/run_integration_test.sh b/test/run_integration_test.sh index 9d1fc80..6dff8e3 100644 --- a/test/run_integration_test.sh +++ b/test/run_integration_test.sh @@ -13,8 +13,8 @@ hamronize ariba data/raw_outputs/ariba/report.tsv --reference_database_version d hamronize amrfinderplus --input_file_name amrfinderplus_nucleotide_report --analysis_software_version AFP_nt_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format json --output hamronized_amrfinderplus_nt.json hamronize amrfinderplus --input_file_name amrfinderplus_nucleotide_report --analysis_software_version AFP_nt_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format tsv --output hamronized_amrfinderplus_nt.tsv -hamronize amrfinderplus --input_file_name amrfinderplus_portein_report --analysis_software_version AFP_aa_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_protein.tsv --format json --output hamronized_amrfinderplus_aa.json -hamronize amrfinderplus --input_file_name amrfinderplus_portein_report --analysis_software_version AFP_aa_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_protein.tsv --format tsv --output hamronized_amrfinderplus_aa.tsv +hamronize amrfinderplus --input_file_name amrfinderplus_protein_report --analysis_software_version AFP_aa_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_protein.tsv --format json --output hamronized_amrfinderplus_aa.json +hamronize amrfinderplus --input_file_name amrfinderplus_protein_report --analysis_software_version AFP_aa_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_protein.tsv --format tsv --output hamronized_amrfinderplus_aa.tsv hamronize rgi --input_file_name rgi_report --analysis_software_version rgi_v1 --reference_database_version card_v1 data/raw_outputs/rgi/rgi.txt --format json --output hamronized_rgi.json hamronize rgi --input_file_name rgi_report --analysis_software_version rgi_v1 --reference_database_version card_v1 data/raw_outputs/rgi/rgi.txt --format tsv --output hamronized_rgi.tsv @@ -27,6 +27,15 @@ hamronize rgi --input_file_name rgi_report --analysis_software_version rgi_v1 -- hamronize resfinder --input_file_name resfinder_report --analysis_software_version resfinder_v4 --reference_database_version resfinder_db_v1 data/raw_outputs/resfinder/ResFinder_results_tab.txt --format json --output hamronized_resfinder.json hamronize resfinder --input_file_name resfinder_report --analysis_software_version resfinder_v4 --reference_database_version resfinder_db_v1 data/raw_outputs/resfinder/ResFinder_results_tab.txt --format tsv --output hamronized_resfinder.tsv +hamronize resfinder --input_file_name resfinder_report --analysis_software_version resfinder_v4.5 --reference_database_version resfinder_db_v1 data/raw_outputs/resfinder/resfinder_inputfasta_tab.txt --format json --output hamronized_resfinder_inputfasta.json +hamronize resfinder --input_file_name resfinder_report --analysis_software_version resfinder_v4.5 --reference_database_version resfinder_db_v1 data/raw_outputs/resfinder/resfinder_inputfasta_tab.txt --format tsv --output hamronized_resfinder_inputfasta.tsv + +hamronize resfinder --input_file_name resfinder_report --analysis_software_version resfinder_v4.5 --reference_database_version resfinder_db_v1 data/raw_outputs/resfinder/resfinder_inputfastq_tab.txt --format json --output hamronized_resfinder_inputfastq.json +hamronize resfinder --input_file_name resfinder_report --analysis_software_version resfinder_v4.5 --reference_database_version resfinder_db_v1 data/raw_outputs/resfinder/resfinder_inputfastq_tab.txt --format tsv --output hamronized_resfinder_inputfastq.tsv + +hamronize resfinder --input_file_name resfinder_report --analysis_software_version resfinder_v4.5 --reference_database_version resfinder_db_v1 data/raw_outputs/resfinder/resfinder_inputfastq_inputfasta.txt --format json --output hamronized_resfinder_inputfastq_inputfastq.json +hamronize resfinder --input_file_name resfinder_report --analysis_software_version resfinder_v4.5 --reference_database_version resfinder_db_v1 data/raw_outputs/resfinder/resfinder_inputfastq_inputfasta.txt --format tsv --output hamronized_resfinder_inputfastq_inputfasta.tsv + hamronize pointfinder --input_file_name pointfinder_report --analysis_software_version resfinder_v4 --reference_database_version pointfinder_db_v1 data/raw_outputs/pointfinder/PointFinder_results.txt --format json --output hamronized_pointfinder.json hamronize pointfinder --input_file_name pointfinder_report --analysis_software_version resfinder_v4 --reference_database_version pointfinder_db_v1 data/raw_outputs/pointfinder/PointFinder_results.txt --format tsv --output hamronized_pointfinder.tsv diff --git a/test/test_parsing_validity.py b/test/test_parsing_validity.py index b15aa85..9ad42db 100644 --- a/test/test_parsing_validity.py +++ b/test/test_parsing_validity.py @@ -292,7 +292,7 @@ def test_kmerresistance(): assert result.reference_gene_stop is None -def test_resfinder(): +def test_pointfinder(): metadata = { "analysis_software_version": "4.1.0", "reference_database_version": "2021-02-01", @@ -318,17 +318,32 @@ def test_resfinder(): assert result.nucleotide_mutation == "GGT -> GAT" assert result.amino_acid_mutation == "p.G81D" - -def test_pointfinder(): +def test_resfinder_fastq(): metadata = { - "analysis_software_version": "4.1.0", - "reference_database_version": "2019-Jul-28", + "analysis_software_version": "4.5.0", + "reference_database_version": "2021-02-01", "input_file_name": "Dummy", } parsed_report = hAMRonization.parse( - "data/dummy/resfinder/ResFinder_results_tab.txt", metadata, "resfinder" + "data/dummy/resfinder/resfinder_inputfastq_inputfasta.txt", metadata, "resfinder" ) + for result in parsed_report: + # assert mandatory fields + assert result.input_file_name == "Dummy" + assert result.gene_symbol == "aph(6)-Id" + assert result.gene_name == "aph(6)-Id" + assert result.reference_database_name == "resfinder" + assert result.reference_database_version == "2021-02-01" + assert result.reference_accession == "M28829" + assert result.analysis_software_name == "resfinder" + assert result.analysis_software_version == "4.5.0" + assert result.genetic_variation_type == "gene_presence_detected" + assert result.coverage_percentage == 100 + assert result.drug_class == "Streptomycin" + assert result.reference_gene_length == 837 + assert result.sequence_identity == 100 + def test_rgi_variants(): metadata = {