Skip to content

Commit

Permalink
Merge pull request #8 from IARCbioinfo/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
nalcala authored Oct 27, 2020
2 parents a07e9e9 + 5ec44fb commit 3e074c7
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 66 deletions.
19 changes: 11 additions & 8 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@ jobs:
# - run: cd ~ ; VERSION=3.6.0; wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-${VERSION}.tar.gz; tar -xzf singularity-${VERSION}.tar.gz; cd singularity; ./mconfig; make -C builddir; sudo make -C builddir install
# - run: cd ~ ; singularity pull ...
- run: cd ~ && git clone https://github.com/iarcbioinfo/data_test.git
- run: cd ; nextflow run ~/project/ --help
- run: cd ; nextflow run ~/project/ -with-docker iarcbioinfo/strelka2-nf --input_folder ~/data_test/BAM/ --mode germline --output_folder strelka2-germline-results --ref ~/data_test/REF/17.fasta --cpu 2 --mem 4
- run: cd ; nextflow run ~/project/ -with-docker iarcbioinfo/strelka2-nf --input_folder ~/data_test/BAM/ --mode germline --output_folder strelka2-germline-exome-results --ref ~/data_test/REF/17.fasta --exome --cpu 2 --mem 4
- run: cd ; nextflow run ~/project/ -with-docker iarcbioinfo/strelka2-nf --input_folder ~/data_test/BAM/ --mode germline --output_folder strelka2-germline-rna-results --ref ~/data_test/REF/17.fasta --rna --cpu 2 --mem 4
- run: cd ; echo -e 'tumor\tnormal\ndata_test/BAM/test/NA06984_T.bam\tdata_test/BAM/test/NA06984_N.bam' > input.tsv ; nextflow run ~/project/ -with-docker iarcbioinfo/strelka2-nf --tn_pairs input.tsv --mode somatic --output_folder strelka2-somatic-results --ref ~/data_test/REF/17.fasta --cpu 2 --mem 4
- run: cd ; nextflow run ~/project/ -with-docker iarcbioinfo/strelka2-nf --tn_pairs input.tsv --mode somatic --output_folder strelka2-somatic-callable-exome-results --ref ~/data_test/REF/17.fasta --AF --exome --outputCallableRegions --cpu 2 --mem 4 -with-dag dag.html
- run: cd ; nextflow run ~/project/ -with-docker iarcbioinfo/strelka2-nf --tn_pairs input.tsv --mode somatic --output_folder strelka2-somatic-callable-exome-results --ref ~/data_test/REF/17.fasta --AF --exome --outputCallableRegions --cpu 2 --mem 4 -with-dag dag.png -resume
- run: cd ; cp ~/dag.* ~/project/.
- run: cd ; nextflow run project/ --help
- run: cd ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_folder data_test/BAM/ --mode germline --output_folder strelka2-germline-results --ref data_test/REF/17.fasta --cpu 2 --mem 4
- run: cd ; echo -e 'sample\tbam\nNA06984\tdata_test/BAM/test/NA06984_N.bam' > input_germline.tsv ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_file input_germline.tsv --mode germline --output_folder strelka2-germline-results-input-file --ref data_test/REF/17.fasta --cpu 2 --mem 4
- run: cd ; echo -e 'sample\tbam\tvcf\nNA06984\tdata_test/BAM/test/NA06984_N.bam\tstrelka2-germline-results/VCFs/filtered/NA06984_N.germline_PASS.vcf.gz' > input_germline_GT.tsv ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_file input_germline_GT.tsv --mode germline --output_folder strelka2-germline-results-input-file-GT --ref data_test/REF/17.fasta --cpu 2 --mem 4
- run: cd ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_folder data_test/BAM/ --mode germline --output_folder strelka2-germline-exome-results --ref data_test/REF/17.fasta --exome --cpu 2 --mem 4
- run: cd ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_folder data_test/BAM/ --mode germline --output_folder strelka2-germline-rna-results --ref data_test/REF/17.fasta --rna --cpu 2 --mem 4
- run: cd ; echo -e 'tumor\tnormal\ndata_test/BAM/test/NA06984_T.bam\tdata_test/BAM/test/NA06984_N.bam' > input.tsv ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_file input.tsv --mode somatic --output_folder strelka2-somatic-results --ref data_test/REF/17.fasta --cpu 2 --mem 4
- run: cd ; echo -e 'tumor\tnormal\tvcf\ndata_test/BAM/test/NA06984_T.bam\tdata_test/BAM/test/NA06984_N.bam\tstrelka2-somatic-results/VCFs/filtered/NA06984_T.somatic_PASS.vcf.gz' > input_GT.tsv ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_file input_GT.tsv --mode somatic --output_folder strelka2-somatic-results-GT --ref data_test/REF/17.fasta --cpu 2 --mem 4
- run: cd ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_file input.tsv --mode somatic --output_folder strelka2-somatic-callable-exome-results --ref data_test/REF/17.fasta --AF --exome --outputCallableRegions --cpu 2 --mem 4 -with-dag dag.html
- run: cd ; nextflow run project/ -with-docker iarcbioinfo/strelka2-nf --input_file input.tsv --mode somatic --output_folder strelka2-somatic-callable-exome-results --ref data_test/REF/17.fasta --AF --exome --outputCallableRegions --cpu 2 --mem 4 -with-dag dag.png -resume
- run: cd ; cp ~/dag.* project/.
- add_ssh_keys:
fingerprints:
- "ee:96:12:b3:06:55:ca:68:07:20:cc:77:75:62:04:f1"
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ FROM continuumio/miniconda3:4.7.12
LABEL base_image="continuumio/miniconda3"
LABEL version="4.7.12"
LABEL software="strelka2-nf"
LABEL software.version="1.0"
LABEL software.version="1.2"
LABEL about.summary="Container image containing all requirements for strelka2-nf"
LABEL about.home="http://github.com/IARCbioinfo/strelka2-nf"
LABEL about.documentation="http://github.com/IARCbioinfo/strelka2-nf/README.md"
Expand Down
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
| Type | Description |
|-----------|---------------|
| --input_folder | folder with bam files |
|--tn_pairs | Tab delimited text file with two columns called normal and tumor (for somatic mode only) |
|--input_file | Tab delimited text file with either two columns called normal and tumor (somatic mode) or one column called bam (germline mode); optionally, a column called sample containing sample names to be used for naming the files can be provided and for genotyping (see genotyping mode below) a column called vcf has to be provided |

Note: the file provided to --tn_pairs is where you can define pairs of bam to analyse with strelka. It's a tabular file with 2 columns normal and tumor.
Note: the file provided to --input_file is where you can define pairs of bam to analyse with strelka in somatic mode. It's a tabular file with 2 columns normal and tumor.

| normal | tumor |
| ----------- | ---------- |
Expand Down Expand Up @@ -58,14 +58,17 @@ Flags are special parameters without value.

## Usage

mode somatic
`nextflow run iarcbioinfo/strelka2.nf r v1.1 -profile singularity --mode somatic --ref hg38.fa --tn_pairs pairs.txt --input_folder path/to/bam/ --strelka path/to/strelka/`

mode germline
`nextflow run iarcbioinfo/strelka2.nf r v1.1 -profile singularity --mode germline --ref hg38.fa --input_folder path/to/bam/ --strelka path/to/strelka/`
### mode somatic
`nextflow run iarcbioinfo/strelka2.nf r v1.2 -profile singularity --mode somatic --ref hg38.fa --tn_pairs pairs.txt --input_folder path/to/bam/ --strelka path/to/strelka/`

To run the pipeline without singularity just remove "-profile singularity". Alternatively, one can run the pipeline using a docker container (-profile docker) the conda receipe containing all required dependencies (-profile conda).

### mode germline
`nextflow run iarcbioinfo/strelka2.nf r v1.2 -profile singularity --mode germline --ref hg38.fa --input_folder path/to/bam/ --strelka path/to/strelka/`

### genotyping
When using the input_file mode, if a vcf column with the path to a VCF file for each sample containing a list of somatic variant is provided, the pipeline will use the --forcedGT option from strelka that genotypes these positions, and compute a bedfile for these positions so only variants from the VCF will be genotyped. Note that genotyping can be performed both in somatic mode (in which case tumor/normal pairs must be provided) and germline mode (in which case a single bam file must be provided).

## Output
| Type | Description |
|-----------|---------------|
Expand Down
2 changes: 2 additions & 0 deletions bin/getAllelicFraction
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ dict<-list( "A"=6, "C"=7, "G"=8, "T"=9 )
snvs<-snvs[snvs$FILTER=="PASS",]

system(paste0("zcat ",infile," | sed '/#CHROM/Q' > ",infile,".header"))
# add info for COV and AF
system(paste0("echo \"##FORMAT=<ID=COV,Number=1,Type=Integer,Description=\\\"Coverage\\\">\n##FORMAT=<ID=AF,Number=1,Type=Float,Description=\\\"Allelic fraction\\\">\" >> ",infile,".header"))
if(nrow(snvs)==0){
write.table(snvs, file=paste0( substr( infile, 1, nchar(infile)-7), ".vcf.body"), sep="\t", row.names=F, quote=F)
system(paste0("cat ",infile,".header ",substr( infile, 1, nchar(infile)-7), ".vcf.body > ",substr( infile, 1, nchar(infile)-7), ".vcf"))
Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ dependencies:
- strelka=2.9.10
- r-data.table =1.12.8
- r-tidyr=1.1.1
- bcftools=1.10.2
- bcftools=1.10.2
- bedops=2.4.39
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ profiles {
conda.createTimeout = "200 min" }
docker {
docker.enabled = true
process.container = 'iarcbioinfo/strelka2-nf:v1.1'
process.container = 'iarcbioinfo/strelka2-nf:v1.2'
}
singularity {
singularity.enabled = true
process.container = 'shub://IARCbioinfo/strelka2-nf:v1.1'
process.container = 'shub://IARCbioinfo/strelka2-nf:v1.2'
pullTimeout = "200 min"
}
}
Expand Down
Loading

0 comments on commit 3e074c7

Please sign in to comment.