diff --git a/snakemake/analysis.yaml b/snakemake/analysis.yaml index 39bc3e55f..6ca224024 100644 --- a/snakemake/analysis.yaml +++ b/snakemake/analysis.yaml @@ -15,7 +15,7 @@ mode: p genome: data/fasta/chr22.fasta # filepath of the exclusion list in BED format -exclusion_list: data/ENCFF001TDO.bed +exclusion_list: data/ENCFF001TDO_chr22.bed exclude_regions: 1 # use the list (1) or don't (0) # file extensions used by the workflow diff --git a/snakemake/data/ENCFF001TDO_chr22.bed b/snakemake/data/ENCFF001TDO_chr22.bed new file mode 100644 index 000000000..6dc1b9fe6 --- /dev/null +++ b/snakemake/data/ENCFF001TDO_chr22.bed @@ -0,0 +1,2 @@ +chr22 16847814 16862659 Satellite_repeat 1000 . +chr22 18876789 18884510 Satellite_repeat 1000 . diff --git a/snakemake/rules/gridss.smk b/snakemake/rules/gridss.smk index e84161c5c..705575bc4 100644 --- a/snakemake/rules/gridss.smk +++ b/snakemake/rules/gridss.smk @@ -7,7 +7,7 @@ rule gridss_p: # paired-samples analysis normal_bam = get_bam("{path}/{normal}"), normal_bai = get_bai("{path}/{normal}") params: - excl_opt = "BLACKLIST=" + get_bed() if exclude_regions() else "" + excl_opt = get_bed() if exclude_regions() else "" output: os.path.join("{path}/{tumor}--{normal}", get_outdir("gridss"), "gridss" + get_filext("vcf")) @@ -43,16 +43,15 @@ rule gridss_p: # paired-samples analysis else # clean-up outdir prior to SV calling rm -fr ${{OUTDIR}}/*gridss* && - gridss gridss.CallVariants \ - WORKER_THREADS={threads} \ - REFERENCE_SEQUENCE="{input.fasta}" \ - {params.excl_opt} \ - INPUT="{input.normal_bam}" \ - INPUT="{input.tumor_bam}" \ - OUTPUT="${{OUTFILE}}" \ - ASSEMBLY="${{OUTDIR}}/gridss_assembly.bam" \ - WORKING_DIR="${{TMP}}" \ - TMP_DIR="${{TMP}}/gridss.${{RANDOM}}" && + gridss \ + -t {threads} \ + -r "{input.fasta}" \ + -o "${{OUTFILE}}" \ + -b {params.excl_opt} \ + -a "${{OUTDIR}}/gridss_assembly.bam" \ + -w "${{TMP}}" \ + "{input.tumor_bam}" \ + "{input.normal_bam}" && # somatic + SV quality filtering # 'normal' sample assumes index 0 bcftools filter \ @@ -70,7 +69,7 @@ rule gridss_s: # single-sample analysis bam = get_bam("{path}/{sample}"), bai = get_bai("{path}/{sample}") params: - excl_opt = "BLACKLIST=" + get_bed() if exclude_regions() else "" + excl_opt = get_bed() if exclude_regions() else "" output: os.path.join("{path}/{sample}", get_outdir("gridss"), "gridss" + get_filext("vcf")) @@ -106,16 +105,15 @@ rule gridss_s: # single-sample analysis else # clean-up outdir prior to SV calling rm -fr ${{OUTDIR}}/*gridss* && - gridss gridss.CallVariants \ - WORKER_THREADS={threads} \ - REFERENCE_SEQUENCE="{input.fasta}" \ - {params.excl_opt} \ - INPUT="{input.bam}" \ - OUTPUT="${{OUTFILE}}" \ - ASSEMBLY="${{OUTDIR}}/gridss_assembly.bam" \ - WORKING_DIR="${{TMP}}" \ - TMP_DIR="${{TMP}}/gridss.${{RANDOM}}" && - # SV quality filtering + gridss \ + -t {threads} \ + -r "{input.fasta}" \ + -o "${{OUTFILE}}" \ + -b {params.excl_opt} \ + -a "${{OUTDIR}}/gridss_assembly.bam" \ + -w "${{TMP}}" \ + "{input.bam}" && + # SV quality filtering bcftools filter \ -O v `# uncompressed VCF format` \ -o "{output}" \