diff --git a/Snakefile b/Snakefile index e45437c..8353dbc 100644 --- a/Snakefile +++ b/Snakefile @@ -107,8 +107,8 @@ minCoverage = leafcutterOpt["minCoverage"] isChimera = "hpc.mssm.edu" in socket.getfqdn() # not sure if this works when running in serial on interactive node -if isChimera: - shell.prefix('export PS1="";source activate snakemake;ml R/3.6.0;') +#if isChimera: +# shell.prefix('export PS1="";source activate snakemake;ml R/3.6.0;') #else: #shell.prefix('conda activate leafcutterpipeline;') @@ -156,11 +156,11 @@ rule extractJunctions: output: 'junctions/{samples}' + juncSuffix shell: - #"samtools index {input};" redundant if indexes are present - #"regtools junctions extract -a 8 -m 50 -M 500000 -s {stranded} -o {output} {input}" + "samtools index {input};" # redundant if indexes are present + "regtools junctions extract -a 8 -i 50 -I 500000 -s {stranded} -o {output} {input.bam}" # conda version of regtools uses i and I instead of m and M - "ml regtools/0.5.1; " - "regtools junctions extract -a 8 -m 50 -M 500000 -s {stranded} -o {output} {input.bam}" + # "ml regtools/0.5.1; " + # "regtools junctions extract -a 8 -m 50 -M 500000 -s {stranded} -o {output} {input.bam}" # remove weird contigs that cause add_chr() to break by adding "chr" to normal chr names @@ -246,7 +246,7 @@ rule junctionQC: params: script = "scripts/cluster_QC.R" shell: - "ml R/3.6.0; " + #"ml R/3.6.0; " "Rscript {params.script} " "--outFolder {outFolder} " "--dataCode {dataCode} " @@ -284,7 +284,7 @@ rule leafcutterDS: params: n_threads = leafcutterOpt['n_threads'] shell: - 'ml R/3.6.0; ' + #'ml R/3.6.0; ' 'Rscript {leafcutterPath}/scripts/leafcutter_ds.R ' ' --output_prefix {outFolder}{wildcards.contrast}/{dataCode}_{wildcards.contrast} ' ' --num_threads {params.n_threads} ' @@ -313,7 +313,7 @@ rule getTerminalExons: input: refFolder + refFile params: - gtftogenepred= "/sc/arion/projects/ad-omics/data/software/UCSC/gtfToGenePred", + gtftogenepred= "gtfToGenePred", genepredtobed = "scripts/genepred_to_bed.py", get_regions = "scripts/create_regions_from_gencode.R", outFolder = refFolder + refCode + "/" @@ -326,7 +326,7 @@ rule getTerminalExons: "{params.gtftogenepred} {input} {output.genepred};" "python {params.genepredtobed} --first_exon {output.genepred} > {output.starts} ; " "python {params.genepredtobed} --last_exon {output.genepred} > {output.ends}; " - "mkdir {params.outFolder}; " + "mkdir -p {params.outFolder}; " "Rscript {params.get_regions} {input} {params.outFolder}" # prepare results for shiny visualisation diff --git a/example/config.yaml b/example/config.yaml index 2e7c90c..6fc0077 100644 --- a/example/config.yaml +++ b/example/config.yaml @@ -36,9 +36,11 @@ stranded: 0 # cluster using regtools junctions or STAR-like junctions # regtools or rapid + clusterRegtools: True +junctionQC: False -leafcutterPath: '/hpc/users/humphj04/software/leafcutter' +leafcutterPath: '/app/leafcutter' leafcutter: # clustering options @@ -58,7 +60,7 @@ leafcutter: python3Path: 'python3' python2Path: 'python2' -refFolder: "/sc/arion/projects/ad-omics/data/references/hg38_reference/GENCODE/" -refFile: "gencode.v30.annotation.gtf.gz" -refCode: "gencode_hg38_v30" +refFolder: "/app/reference/" +refFile: "gencode.v38.annotation.gtf.gz" +refCode: "gencode_hg38_v38" diff --git a/example/data/TDP43_knockdown_1_unique_kcnq2.bam b/example/data/TDP43_knockdown_1_unique_kcnq2.bam index b087217..4df0a3f 100644 Binary files a/example/data/TDP43_knockdown_1_unique_kcnq2.bam and b/example/data/TDP43_knockdown_1_unique_kcnq2.bam differ diff --git a/example/data/TDP43_knockdown_2_unique_kcnq2.bam b/example/data/TDP43_knockdown_2_unique_kcnq2.bam index f318782..03e9cc1 100644 Binary files a/example/data/TDP43_knockdown_2_unique_kcnq2.bam and b/example/data/TDP43_knockdown_2_unique_kcnq2.bam differ diff --git a/example/data/TDP43_knockdown_3_unique_kcnq2.bam b/example/data/TDP43_knockdown_3_unique_kcnq2.bam index 7528274..f5c4f02 100644 Binary files a/example/data/TDP43_knockdown_3_unique_kcnq2.bam and b/example/data/TDP43_knockdown_3_unique_kcnq2.bam differ diff --git a/example/data/control_scramble_1_unique_kcnq2.bam b/example/data/control_scramble_1_unique_kcnq2.bam index 6f90c44..444c231 100644 Binary files a/example/data/control_scramble_1_unique_kcnq2.bam and b/example/data/control_scramble_1_unique_kcnq2.bam differ diff --git a/example/data/control_scramble_2_unique_kcnq2.bam b/example/data/control_scramble_2_unique_kcnq2.bam index 33601e7..ff6bfa6 100644 Binary files a/example/data/control_scramble_2_unique_kcnq2.bam and b/example/data/control_scramble_2_unique_kcnq2.bam differ diff --git a/example/data/control_scramble_3_unique_kcnq2.bam b/example/data/control_scramble_3_unique_kcnq2.bam index 9e778d4..27a97d6 100644 Binary files a/example/data/control_scramble_3_unique_kcnq2.bam and b/example/data/control_scramble_3_unique_kcnq2.bam differ diff --git a/example/run_snakemake.sh b/example/run_snakemake.sh index 3ac94db..b978b86 100644 --- a/example/run_snakemake.sh +++ b/example/run_snakemake.sh @@ -3,4 +3,4 @@ #ml R # this version of R has the current version of leafcutter -snakemake -s ../Snakefile --configfile config.yaml +snakemake -c1 -s ../Snakefile --configfile config.yaml