Skip to content

Commit

Permalink
changes on giab benchmarking
Browse files Browse the repository at this point in the history
Former-commit-id: 5fb9639
  • Loading branch information
elowy01 committed Feb 20, 2019
1 parent 20aa286 commit 524cca8
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 13 deletions.
14 changes: 7 additions & 7 deletions scripts/VCF/FILTER/MLfilter_applymodel.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ if (params.help) {
log.info ' --annotations ANNOTATION_STRING String containing the annotations to filter, for example:'
log.info ' %CHROM\t%POS\t%INFO/DP\t%INFO/RPB\t%INFO/MQB\t%INFO/BQB\t%INFO/MQSB\t%INFO/SGB\t%INFO/MQ0F\t%INFO/ICB\t%INFO/HOB\t%INFO/MQ\n.'
log.info ' --vt VARIANT_TYPE Type of variant to filter. Poss1ible values are 'snps'/'indels'.'
log.info ' --split_multiallelics true/false If true then split the multiallelic positions. Default=false.'
log.info ' --threads INT Number of threads used in the different BCFTools processes. Default=1.'
log.info ''
exit 1
Expand All @@ -45,8 +44,7 @@ chrChannel=Channel.from( chrList )

//Apply a fitted model obtained after running MLfilter_trainmodel.nf

if (params.split_multiallelics==true) {
process split_multiallelic {
process split_multiallelic {
/*
This process is used to split the multiallelic sites into different lines per allele. It uses bcftools norm
for this
Expand All @@ -62,15 +60,15 @@ if (params.split_multiallelics==true) {

output:
file 'out.splitted.vcf.gz' into splitted_vcf
file 'out.splitted.vcf.gz.tbi' into splitted_vcf_tbi
val chr into chr1

"""
bcftools norm -r ${chr} -m -${params.vt} ${params.vcf} -o out.splitted.vcf.gz -Oz --threads ${params.threads}
tabix out.splitted.vcf.gz
"""
}
}


process get_variant_annotations {
/*
Process to get the variant annotations for the selected ${params.vt} from the unfiltered VCF file
Expand All @@ -83,13 +81,15 @@ process get_variant_annotations {

input:
val chr from chr1
file splitted_vcf
file splitted_vcf_tbi

output:
file 'unfilt_annotations.vt.tsv.gz' into unfilt_annotations
val chr into chr2

"""
bcftools view -c1 -r ${chr} -v ${params.vt} ${params.vcf} -o out.onlyvariants.vt.vcf.gz -Oz --threads ${params.threads}
bcftools view -c1 -r ${chr} -v ${params.vt} ${splitted_vcf} -o out.onlyvariants.vt.vcf.gz -Oz --threads ${params.threads}
tabix out.onlyvariants.vt.vcf.gz
bcftools query -H -r ${chr} -f '${params.annotations}' out.onlyvariants.vt.vcf.gz | bgzip -c > unfilt_annotations.vt.tsv.gz
"""
Expand All @@ -101,7 +101,7 @@ process apply_model {
and to apply this model on the unfiltered VCF
*/

memory '2 GB'
memory '5 GB'
executor 'lsf'
queue "${params.queue}"
cpus 1
Expand Down
2 changes: 1 addition & 1 deletion scripts/VCF/QC/compare_with_giab.config
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
params.bcftools_folder='/homes/ernesto/bin/bcftools-1.6/' // folder containin the bcftools binary
params.bcftools_folder='/nfs/production/reseq-info/work/bin/bcftools-1.9/' // folder containin the bcftools binary
params.tabix='/nfs/production/reseq-info/work/ernesto/bin/anaconda3/bin/tabix' // path to tabix binary
params.bgzip='/nfs/production/reseq-info/work/ernesto/bin/anaconda3/bin/bgzip' // path to bgzip
params.non_valid_regions='/nfs/production/reseq-info/work/ernesto/isgr/SUPPORTING/REFERENCE/exclude_nonvalid.bed' // path to BED format file containing the regions that will be excluded from the comparison
Expand Down
2 changes: 1 addition & 1 deletion scripts/VCF/QC/compare_with_giab.nf
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ process excludeNonVariants {
file 'out.sites.vcf.gz' into out_sites_vcf

"""
${params.bcftools_folder}/bcftools view -G -m2 -M2 -c1 ${params.vcf} -f.,PASS -r ${params.chros} -o out.sites.vcf.gz -Oz
${params.bcftools_folder}/bcftools view -m2 -M2 -c1 ${params.vcf} -f.,PASS -r ${params.chros} -o out.sites.vcf.gz -Oz
${params.tabix} out.sites.vcf.gz
"""
}
Expand Down
4 changes: 1 addition & 3 deletions scripts/VCF/UTILS/split_giab_into_chros.config
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
params.bcftools_folder='/homes/ernesto/bin/bcftools-1.6/' // folder containin the bcftools binary
params.tabix_folder='/nfs/production/reseq-info/work/ernesto/bin/anaconda3/bin/' // path to tabix binary
params.giab='/nfs/production/reseq-info/work/ernesto/isgr/SUPPORTING/REFERENCE/GIAB/ANALYSIS_READY/CHR20/NA12878.giab.SNP.chr20.non_valid.reheaded.vcf.gz'
params.queue='production-rh7' // queue name used by LSF
params.queue='production-rh74' // queue name used by LSF
2 changes: 1 addition & 1 deletion scripts/VCF/UTILS/split_giab_into_chros.nf
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ process splitVCF {
file "${params.prefix}.${chr}.biallelic.vcf.gz*" into chr_vcf

"""
${params.bcftools_folder}/bcftools view -m2 -M2 -r ${chr} ${params.giab_vcf} -o ${params.prefix}.${chr}.biallelic.vcf.gz -O z
bcftools view -m2 -M2 -r ${chr} ${params.giab_vcf} -o ${params.prefix}.${chr}.biallelic.vcf.gz -O z
${params.tabix_folder}/tabix ${params.prefix}.${chr}.biallelic.vcf.gz
"""
}

0 comments on commit 524cca8

Please sign in to comment.