forked from hassanfa/autoseq-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_msings.sh
executable file
·122 lines (93 loc) · 3.11 KB
/
run_msings.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/bash
# The call of the script
echo "$0 $@"
echo
# Check if enough number of arguments
if [ $# -ne 11 ]; then
echo "Invalid number of options/arguments.
Usage: $0 [options] in.bam
Options (all required):
-b, --bed FILE msi bed file, specifying the microsatellite regions for the utilized panel
-f, --fasta FILE fasta reference genome
-i, --intervals FILE msi intervals file customized for the utilized panel, for internal program use
-n, --baseline FILE msi baseline file, based on msi negative samples with the utilized panel
-o, --outdir DIR output directory" 1>&2
exit 1
fi
# parse the options
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-b|--bed) # msi bed file
BEDFILE="$2"
shift # past argument
shift # past value
;;
-f|--fasta) # fasta reference genome
REF_GENOME="$2"
shift # past argument
shift # past value
;;
-i|--intervals) # msi intervals file
INTERVALS_FILE="$2"
shift # past argument
shift # past value
;;
-n|--baseline) # msi baseline file
MSI_BASELINE="$2"
shift # past argument
shift # past value
;;
-o|--outdir) # output directory
OUTDIR="$2"
shift # past argument
shift # past value
;;
*) # unknown option / positional argument
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
# bam file to analyze, given as positional argument 1
BAM=$1
# activate the msings virtual environment
source $MSINGSENV/bin/activate # NB! Need to set up the msings variable in alascca-dotfiles
VARSCAN=$MSINGSENV/bin/VarScan.v2.3.7.jar # NB! Need to set up the msings variable in alascca-dotfiles
#"multiplier" is the number of standard deviations from the baseline that is required to call instability
multiplier=2.0
#"msi_min_threshold" is the maximum fraction of unstable sites allowed to call a specimen MSI negative
msi_min_threshold=0.1
#"msi_max_threshold" is the minimum fraction of unstable sites allowed to call a specimen MSI positive
msi_max_threshold=0.1
# Run the mSINGS analysis
BAMNAME=$(basename $BAM)
PFX=${BAMNAME%.*}
mkdir -p $OUTDIR/$PFX
echo
echo “Starting Analysis of $PFX”
date +"%Y-%m-%d %H:%M"
echo
echo "Making mpileup"
date +"%Y-%m-%d %H:%M"
samtools mpileup -f $REF_GENOME -d 100000 -A -E $BAM -l $INTERVALS_FILE | awk '{if($4 >= 6) print $0}' > $OUTDIR/$PFX/$PFX.mpileup # make pileup if depth >= 6 reads
echo
echo "Varscan Readcounts start"
date +"%Y-%m-%d %H:%M"
java -Xmx4g -jar $VARSCAN readcounts $OUTDIR/$PFX/$PFX.mpileup --variants-file $INTERVALS_FILE --min-base-qual 10 --output-file $OUTDIR/$PFX/$PFX.msi_output &
wait
echo
echo "MSI Analyzer start"
date +"%Y-%m-%d %H:%M"
msi analyzer $OUTDIR/$PFX/$PFX.msi_output $BEDFILE -o $OUTDIR/$PFX/$PFX.msi.txt
echo
echo "MSI calls start"
date +"%Y-%m-%d %H:%M"
msi count_msi_samples $MSI_BASELINE $OUTDIR/$PFX -m $multiplier -t $msi_min_threshold $msi_max_threshold -o $OUTDIR/$PFX/$PFX.MSI_Analysis.txt
echo
echo “Completed Analysis of $PFX”
date +"%Y-%m-%d %H:%M"
echo