Skip to content

Commit

Permalink
Updated drug resistance marker code to account for het genotypes. (#…
Browse files Browse the repository at this point in the history
…467)

* Updated drug resistance marker code.

- Now `PfalciparumTypeDrugResistanceMarkers::CallDrugResistanceMutations` pulls info directly from
  the VCF file using pysam, rather than a series of shell commands.
- Now `PfalciparumTypeDrugResistanceMarkers::CallDrugResistanceMutations` preserves het/hom
  genotype information in the raw output file.
- `PfalciparumDrugResistanceSummary.wdl` now calls into
  `PfalciparumTypeDrugResistanceMarkers` for
  `CreateDrugResistanceSummary`.
- Minor refactoring of names.
* Added VCF Index as input to `PfalciparumTypeDrugResistanceMarkers.wdl`
* Fixed output names in `PfalciparumDrugResistanceSummary.wdl`.
  • Loading branch information
jonn-smith authored Aug 26, 2024
1 parent 424dc94 commit 52868a8
Show file tree
Hide file tree
Showing 2 changed files with 302 additions and 330 deletions.
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
version 1.0

import "../../../tasks/Utility/Finalize.wdl" as FF
import "PfalciparumTypeDrugResistanceMarkers.wdl" as DRUG_RES

workflow PfalciparumDrugResistanceSummary {
meta {
desciption: "Create a drug resistance report based on the given raw drug resistance loci report."
}

input {
File raw_drug_resistance_report
File raw_drug_resistance_info

String participant_name

Expand All @@ -21,9 +22,9 @@ workflow PfalciparumDrugResistanceSummary {
gcs_out_root_dir: "GCS Bucket into which to finalize outputs. If no bucket is given, outputs will not be finalized and instead will remain in their native execution location."
}

call CreateDrugResistanceSummary as CreateDrugResistanceSummary {
call DRUG_RES.CreateDrugResistanceSummary as CreateDrugResistanceSummary {
input:
raw_drug_resistance_report = raw_drug_resistance_report,
raw_drug_resistance_info = raw_drug_resistance_info,
prefix = participant_name
}

Expand All @@ -36,263 +37,11 @@ workflow PfalciparumDrugResistanceSummary {
output {
File drug_resistance_summary = select_first([FinalizeDrugResistanceSummary.gcs_path, CreateDrugResistanceSummary.resistance_summary])

String drug_status_chloroquine = CreateDrugResistanceSummary.chloroquine_status
String drug_status_pyrimethamine = CreateDrugResistanceSummary.pyrimethamine_status
String drug_status_sulfadoxine = CreateDrugResistanceSummary.sulfadoxine_status
String drug_status_mefloquine = CreateDrugResistanceSummary.mefloquine_status
String drug_status_artemisinin = CreateDrugResistanceSummary.artemisinin_status
String drug_status_piperaquine = CreateDrugResistanceSummary.piperaquine_status
}
}

task CreateDrugResistanceSummary {
meta {
desciption: "Create a drug resistance report based on the given raw drug resistance loci report."
}

input {
File raw_drug_resistance_report
String prefix
RuntimeAttr? runtime_attr_override
}
String outfile_name = "~{prefix}.drug_resistance_summary.txt"
Int disk_size = 1 + 4*ceil(size(raw_drug_resistance_report, "GB"))
command <<<
python3 <<CODE
import re
from enum import Enum
# Set up some functions to determine drug sensitivity:
pchange_regex = re.compile(r"""p\.([A-z]+)([0-9]+)([A-z]+)""")
AA_3_2 = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K', 'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}
DrugSensitivity = Enum("DrugSensitivity", ["UNDETERMINED", "SENSITIVE", "RESISTANT"])
def parse_pchange(pchange):
old, pos, new = pchange_regex.match(pchange).groups()
return AA_3_2[old.upper()], int(pos), AA_3_2[new.upper()]
def get_chloroquine_sensitivity(dr_report):
# Locus utilized: PF3D7_0709000 (crt)
# Codon: 76
# Workflow:
# Step Genetic change Interpretation Classification
# 1 76 K/T heterozygote Heterozygous mutant Undetermined
# 2 76 missing Missing Undetermined
# 3 K76 Wild type Sensitive
# 4 76T Mutant Resistant
# 5 otherwise Unknown mutant Undetermined
with open(dr_report, 'r') as f:
for line in f:
# We only care about this locus for this drug:
if line.startswith("pfcrt PF3D7_0709000"):
gene, locus, pchange, marker = line.strip().split(" ")
old, pos, new = parse_pchange(pchange)
if pos == 76:
if (old == "L") and (new == "T") and (marker == "absent"):
return DrugSensitivity.SENSITIVE
elif (new == "T") and (marker == "present"):
return DrugSensitivity.RESISTANT
return DrugSensitivity.UNDETERMINED
def get_pyrimethamine_sensitivity(dr_report):
# Locus utilized: PF3D7_0417200 (dhfr)
# Codon: 108
# Workflow:
# Step Genetic change Interpretation Classification
# 1 108 S/N heterozygote Heterozygous mutant Undetermined
# 2 108 missing Missing Undetermined
# 3 S108 Wild type Sensitive
# 4 108N Mutant Resistant
# 5 otherwise Unknown mutant Undetermined
with open(dr_report, 'r') as f:
for line in f:
# We only care about this locus for this drug:
if line.startswith("pfdhfr PF3D7_0417200"):
gene, locus, pchange, marker = line.strip().split(" ")
old, pos, new = parse_pchange(pchange)
if pos == 108:
if (old == "S") and (new == "N") and (marker == "absent"):
return DrugSensitivity.SENSITIVE
elif (old == "S") and (new == "N") and (marker == "present"):
return DrugSensitivity.RESISTANT
elif (new == "N") and (marker == "present"):
return DrugSensitivity.RESISTANT
elif marker == "absent":
return DrugSensitivity.SENSITIVE
return DrugSensitivity.UNDETERMINED
def get_sulfadoxine_sensitivity(dr_report):
# Locus utilized: PF3D7_0810800 (dhps)
# Codon: 437
# Workflow:
# Step Genetic change Interpretation Classification
# 1 437 A/G heterozygote Heterozygous mutant Undetermined
# 2 437 missing Missing Undetermined
# 3 A437 Wild type Sensitive
# 4 437G Mutant Resistant
# 5 otherwise Unknown mutant Undetermined
with open(dr_report, 'r') as f:
for line in f:
# We only care about this locus for this drug:
if line.startswith("pfdhps PF3D7_0810800"):
gene, locus, pchange, marker = line.strip().split(" ")
old, pos, new = parse_pchange(pchange)
if pos == 437:
if (old == "A") and (new == "G") and (marker == "absent"):
return DrugSensitivity.SENSITIVE
elif (old == "A") and (new == "G") and (marker == "present"):
return DrugSensitivity.RESISTANT
elif (new == "G") and (marker == "present"):
return DrugSensitivity.RESISTANT
elif marker == "absent":
return DrugSensitivity.SENSITIVE
return DrugSensitivity.UNDETERMINED
def get_mefloquine_sensitivity(dr_report):
# Locus utilized: PF3D7_0523000 (mdr1)
# Codons: Amplification status of whole gene
# Workflow:
# Step Genetic change Interpretation Classification
# 1 Missing Missing Undetermined
# 2 Heterozygous duplication Heterozygous mutant Undetermined
# 3 Single copy Wild type Sensitive
# 4 Multiple copies Mutant Resistant
# Currently we can't determine this.
# We need to get CNV calling working first.
return DrugSensitivity.UNDETERMINED
def get_artemisinin_sensitivity(dr_report):
# Locus utilized: PF3D7_1343700 (kelch13)
# Codons: 349-726 (BTB/POZ and propeller domains)
# Workflow:
# Step Genetic change Interpretation Classification
# 1 Homozygous non-synonymous mutations in the kelch13 BTB/POZ and propeller
# domain classified by the World Health Organisation as associated with delayed
# parasite clearance
# Mutant – associated with delayed clearance Resistant
# 2 Heterozygous non-synonymous mutations in the kelch13 BTB/POZ and
# propeller domain classified by the World Health Organisation as associated
# with delayed parasite clearance
# Mutant - heterozygous Undetermined
# 3 578S as homozygous Mutant - not associated Sensitive
# 4 Any missing call in amino acids 349-726 Missing Undetermined
# 5 No non-reference calls in amino acids 349-726 Wild-type Sensitive
# 6 otherwise Mutant - not in WHO list Undetermined
with open(dr_report, 'r') as f:
for line in f:
# We only care about this locus for this drug:
if line.startswith("pfkelch13 PF3D7_1343700"):
gene, locus, pchange, marker = line.strip().split(" ")
old, pos, new = parse_pchange(pchange)
has_non_ref = False
has_variants = False
if 349 <= pos <= 726:
if (old != new) and (marker == "present"):
return DrugSensitivity.RESISTANT
elif (new == "S") and (marker == "present"):
return DrugSensitivity.SENSITIVE
elif marker == "present":
has_non_ref = True
has_variants = True
if (has_variants) and (not has_non_ref):
return DrugSensitivity.SENSITIVE
return DrugSensitivity.UNDETERMINED
def get_piperaquine_sensitivity(dr_report):
# Loci utilized: PF3D7_1408000 (plasmepsin 2) and PF3D7_1408100 (plasmepsin 3)
# Codons: Amplification status of both genes
# Workflow:
# Step Genetic change Interpretation Classification
# 1 Missing Missing Undetermined
# 2 Heterozygous duplication Heterozygous mutant Undetermined
# 3 Single copy Wild type Sensitive
# 4 Multiple copies Mutant Resistant
# Currently we can't determine this.
# We need to get CNV calling working first.
return DrugSensitivity.UNDETERMINED
# Get the drug resistances:
chloroquine = get_chloroquine_sensitivity("~{raw_drug_resistance_report}")
pyrimethamine = get_pyrimethamine_sensitivity("~{raw_drug_resistance_report}")
sulfadoxine = get_sulfadoxine_sensitivity("~{raw_drug_resistance_report}")
mefloquine = get_mefloquine_sensitivity("~{raw_drug_resistance_report}")
artemisinin = get_artemisinin_sensitivity("~{raw_drug_resistance_report}")
piperaquine = get_piperaquine_sensitivity("~{raw_drug_resistance_report}")
with open("~{outfile_name}", 'w') as f:
f.write(f"#~{prefix} Drug Resistances:\n")
f.write(f"Chloroquine: {chloroquine.name}\n")
f.write(f"Pyrimethamine: {pyrimethamine.name}\n")
f.write(f"Sulfadoxine: {sulfadoxine.name}\n")
f.write(f"Mefloquine: {mefloquine.name}\n")
f.write(f"Artemisinin: {artemisinin.name}\n")
f.write(f"Piperaquine: {piperaquine.name}\n")
f.write("\n")
# Write our resistance status for each drug:
with open("chloroquine_status.txt", 'w') as f:
f.write(f"{chnoroquine.name}\n")
with open("pyrimethamine_status.txt", 'w') as f:
f.write(f"{pyrimethamine.name}\n")
with open("sulfadoxine_status.txt", 'w') as f:
f.write(f"{sulfadoxine.name}\n")
with open("mefloquine_status.txt", 'w') as f:
f.write(f"{mefloquine.name}\n")
with open("artemisinin_status.txt", 'w') as f:
f.write(f"{artemisinin.name}\n")
with open("piperaquine_status.txt", 'w') as f:
f.write(f"{piperaquine.name}\n")
CODE
>>>
output {
File resistance_summary = "~{outfile_name}"

String chloroquine_status = read_string("chloroquine_status.txt")
String pyrimethamine_status = read_string("pyrimethamine_status.txt")
String sulfadoxine_status = read_string("sulfadoxine_status.txt")
String mefloquine_status = read_string("mefloquine_status.txt")
String artemisinin_status = read_string("artemisinin_status.txt")
String piperaquine_status = read_string("piperaquine_status.txt")
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 1,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 1,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-utils:0.1.8"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
String predicted_chloroquine_status = CreateDrugResistanceSummary.predicted_chloroquine_status
String predicted_pyrimethamine_status = CreateDrugResistanceSummary.predicted_pyrimethamine_status
String predicted_sulfadoxine_status = CreateDrugResistanceSummary.predicted_sulfadoxine_status
String predicted_mefloquine_status = CreateDrugResistanceSummary.predicted_mefloquine_status
String predicted_artemisinin_status = CreateDrugResistanceSummary.predicted_artemisinin_status
String predicted_piperaquine_status = CreateDrugResistanceSummary.predicted_piperaquine_status
}
}
Loading

0 comments on commit 52868a8

Please sign in to comment.