-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathreport.mk
executable file
·126 lines (108 loc) · 4.88 KB
/
report.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/make -rRsf
SHELL=/bin/bash -o pipefail
#USAGE:
#
# $HOME/Oyster_River/Protocol/report.mk main CPU=24 \
# ASSEMBLY=test.fasta \
# READ1=1.subsamp_1.cor.fq \
# READ2=1.subsamp_2.cor.fq \
# LINEAGE=eukaryota_odb9 \
# RUNOUT=test
#
MAKEDIR := $(dir $(firstword $(MAKEFILE_LIST)))
DIR := ${CURDIR}
CPU=24
MEM=120
READ1=
READ2=
BUSCO := ${shell which run_BUSCO.py}
BUSCODIR := $(dir $(firstword $(BUSCO)))
RUNOUT =
ASSEMBLY=
LINEAGE=
BUSCODBDIR := ${MAKEDIR}/busco_dbs/
BUSCOUT := BUSCO_$(shell basename ${ASSEMBLY} .fasta)
salmonpath := $(shell which salmon 2>/dev/null)
buscopath := $(shell which run_BUSCO.py 2>/dev/null)
transratepath := $(shell which transrate 2>/dev/null)
BUSCO_CONFIG_FILE := ${MAKEDIR}/software/config.ini
export BUSCO_CONFIG_FILE
VERSION := ${shell cat ${MAKEDIR}version.txt}
help:
main: setup check welcome diamond busco transrate strandeval reportgen
diamond:${DIR}/reports/${RUNOUT}.unique.txt
busco:${DIR}/reports/${RUNOUT}.busco.done
transrate:${DIR}/reports/${RUNOUT}.transrate.done
clean:
setup:${DIR}/setup.done
strandeval:{DIR}/reports/${RUNOUT}.strandeval.done
.DELETE_ON_ERROR:
.PHONY:report check clean
${DIR}/setup.done:
@mkdir -p ${DIR}/reports
touch ${DIR}/setup.done
check:
ifdef salmonpath
else
$(error "\n\n*** SALMON is not installed, must fix ***")
endif
ifdef transratepath
else
$(error "\n\n*** TRANSRATE is not installed, must fix ***")
endif
ifdef buscopath
else
$(error "\n\n*** BUSCO is not installed, must fix ***")
endif
help:
printf "\n\n***** Welcome to the Oyster River Report Generation Tool ***** \n"
printf "***** This is version ${VERSION} *****\n\n"
printf "Usage:\n\n"
printf "/path/to/Oyster_River/Protocol/report.mk main CPU=24\n"
printf "ASSEMBLY=test.fasta\n"
printf "LINEAGE=eukaryota_odb9\n"
printf "READ1=1.subsamp_1.cor.fq\n"
printf "READ2=1.subsamp_2.cor.fq\n"
printf "RUNOUT=test\n\n"
welcome:
printf "\n\n***** Welcome to the Oyster River Report Generation Tool ***** \n"
printf "***** This is version ${VERSION} ***** \n\n "
printf " \n\n"
${DIR}/reports/${RUNOUT}.busco.done:${ASSEMBLY}
python $$(which busco) --offline --lineage ${MAKEDIR}/busco_dbs/${LINEAGE} -i ${ASSEMBLY} -m transcriptome --cpu ${BUSCO_THREADS} -o run_${RUNOUT} --config ${MAKEDIR}/software/config.ini
mv run_${RUNOUT}* ${DIR}/reports/
touch ${DIR}/reports/${RUNOUT}.busco.done
${DIR}/reports/${RUNOUT}.transrate.done:${ASSEMBLY}
${MAKEDIR}/software/orp-transrate/transrate -o ${DIR}/reports/transrate_${RUNOUT} -a ${ASSEMBLY} --left ${READ1} --right ${READ2} -t $(CPU)
touch ${DIR}/reports/${RUNOUT}.transrate.done
find ${DIR}/reports/transrate_${RUNOUT}/ -name "*bam" -delete
${DIR}/reports/${RUNOUT}.unique.txt:${ASSEMBLY}
diamond blastx -p $(CPU) -e 1e-8 --top 0.1 -q ${ASSEMBLY} -d ${MAKEDIR}/software/diamond/swissprot -o ${DIR}/reports/${RUNOUT}.diamond.txt
awk '{print $$2}' ${DIR}/reports/${RUNOUT}.diamond.txt | awk -F "|" '{print $$3}' | cut -d _ -f2 | sort | uniq | wc -l > ${DIR}/reports/${RUNOUT}.unique.txt
clean:
rm -fr ${DIR}/reports/busco.done ${DIR}/reports/transrate.done ${DIR}/reports/${RUNOUT}.unique.txt ${DIR}/reports/run_${RUNOUT} ${DIR}/reports/transrate_${RUNOUT}/
{DIR}/reports/${RUNOUT}.strandeval.done:
bwa index -p ${RUNOUT} ${ASSEMBLY}
bwa mem -t $(CPU) ${RUNOUT} \
<(seqtk sample -s 23894 ${READ1} 200000) \
<(seqtk sample -s 23894 ${READ2} 200000) \
| samtools view -@10 -Sb - \
| samtools sort -T ${RUNOUT} -O bam -@10 -o "${RUNOUT}".sorted.bam -
perl -I $$(dirname $$(readlink -f $$(which Trinity)))/PerlLib ${MAKEDIR}/scripts/examine_strand.pl "${RUNOUT}".sorted.bam ${RUNOUT}
hist -p '#' -c red <(cat ${RUNOUT}.dat | awk '{print $$5}' | sed 1d)
rm -f "${RUNOUT}".sorted.bam
touch ${DIR}/reports/${RUNOUT}.strandeval.done
printf "\n\n***** See the following link for interpretation ***** \n"
printf "***** https://oyster-river-protocol.readthedocs.io/en/latest/strandexamine.html ***** \n\n"
reportgen:
printf "\n\n***** QUALITY REPORT FOR: ${RUNOUT} ****"
printf "\n***** THE ASSEMBLY CAN BE FOUND HERE: ${ASSEMBLY} **** \n\n"
printf "***** BUSCO SCORE ~~~~~> " | tee -a ${DIR}/reports/qualreport.${RUNOUT}
cat $$(find ${DIR}/reports/run_${RUNOUT} -name 'short*') | sed -n 8p | tee -a ${DIR}/reports/qualreport.${RUNOUT}
printf "***** TRANSRATE SCORE ~~~~~> " | tee -a ${DIR}/reports/qualreport.${RUNOUT}
cat $$(find ${DIR}/reports/transrate_${RUNOUT} -name assemblies.csv) | awk -F , '{print $$37}' | sed -n 2p | tee -a ${DIR}/reports/qualreport.${RUNOUT}
printf "***** TRANSRATE OPTIMAL SCORE ~~~~~> " | tee -a ${DIR}/reports/qualreport.${RUNOUT}
cat $$(find ${DIR}/reports/transrate_${RUNOUT} -name assemblies.csv) | awk -F , '{print $$38}' | sed -n 2p | tee -a ${DIR}/reports/qualreport.${RUNOUT}
printf "***** UNIQUE GENES ~~~~~~~~~> " | tee -a ${DIR}/reports/qualreport.${RUNOUT}
cat ${DIR}/reports/${RUNOUT}.unique.txt | tee -a ${DIR}/reports/qualreport.${RUNOUT}
printf " \n\n"