-
Notifications
You must be signed in to change notification settings - Fork 14
/
orthofuser.mk
executable file
·109 lines (91 loc) · 5.65 KB
/
orthofuser.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/make -rRsf
SHELL=/bin/bash -o pipefail
#USAGE:
#
# orthofuser.mk all READ1= READ2= CPU= RUNOUT= FASTADIR= LINEAGE=
#
MAKEDIR := $(dir $(firstword $(MAKEFILE_LIST)))
DIR := ${CURDIR}
CPU=16
READ1=
READ2=
BUSCO := ${shell which run_BUSCO.py}
BUSCODIR := $(dir $(firstword $(BUSCO)))
RUNOUT =
LINEAGE=
BUSCODB :=
INPUT := $(shell basename ${READ1})
FASTADIR=
BUSCO_CONFIG_FILE := ${MAKEDIR}/software/config.ini
export BUSCO_CONFIG_FILE
VERSION := ${shell cat ${MAKEDIR}version.txt}
setup:${DIR}/ortho_setup.done
merge:${DIR}/orthofuse/${RUNOUT}/merged.fasta
orthotransrate:${DIR}/orthofuse/${RUNOUT}/orthotransrate.done
orthofusing:${DIR}/assemblies/${RUNOUT}.orthomerged.fasta
cdhit:${DIR}/assemblies/${RUNOUT}.ORP.fasta
busco:${DIR}/reports/${RUNOUT}.busco.done
transrate:${DIR}/reports/${RUNOUT}.transrate.done
salmon:${DIR}/quants/salmon_orthomerged_${RUNOUT}/quant.sf
reportgen:
all: setup merge orthotransrate orthofusing cdhit busco transrate salmon reportgen
.DELETE_ON_ERROR:
.PHONY:report
${DIR}/ortho_setup.done:
@mkdir -p ${DIR}/reports
@mkdir -p ${DIR}/quants
@mkdir -p ${DIR}/assemblies
touch ${DIR}/ortho_setup.done
${DIR}/orthofuse/${RUNOUT}/merged.fasta:
mkdir -p ${DIR}/orthofuse/${RUNOUT}/working
for fasta in $$(ls ${FASTADIR}); do python ${MAKEDIR}/scripts/long.seq.py ${FASTADIR}/$$fasta ${DIR}/orthofuse/${RUNOUT}/working/$$fasta.short.fasta 200; done
( \
source ${MAKEDIR}/software/anaconda/install/bin/activate py27; \
python $$(which orthofuser.py) -I 4 -f ${DIR}/orthofuse/${RUNOUT}/working/ -og -t $(CPU) -a $(CPU); \
source ${MAKEDIR}/software/anaconda/install/bin/activate orp_v2;\
)
cat ${DIR}/orthofuse/${RUNOUT}/working/*short.fasta > ${DIR}/orthofuse/${RUNOUT}/merged.fasta
${DIR}/orthofuse/${RUNOUT}/orthotransrate.done:${DIR}/orthofuse/${RUNOUT}/merged.fasta
export END=$$(wc -l $$(find ${DIR}/orthofuse/${RUNOUT}/working/ -name Orthogroups.txt 2> /dev/null) | awk '{print $$1}') && \
export ORTHOINPUT=$$(find ${DIR}/orthofuse/${RUNOUT}/working/ -name Orthogroups.txt 2> /dev/null) && \
echo $$(eval echo "{1..$$END}") | tr ' ' '\n' > list && \
cat list | parallel -j $(CPU) -k "sed -n ''{}'p' $$ORTHOINPUT | tr ' ' '\n' | sed '1d' > ${DIR}/orthofuse/${RUNOUT}/{1}.groups"
transrate -o ${DIR}/orthofuse/${RUNOUT}/merged -t $(CPU) -a ${DIR}/orthofuse/${RUNOUT}/merged.fasta --left ${READ1} --right ${READ2}
touch ${DIR}/orthofuse/${RUNOUT}/orthotransrate.done
${DIR}/assemblies/${RUNOUT}.orthomerged.fasta:${DIR}/orthofuse/${RUNOUT}/orthotransrate.done
echo All the text files are made, start GREP
find ${DIR}/orthofuse/${RUNOUT}/ -name '*groups' 2> /dev/null | parallel -j $(CPU) "grep -wf {} $$(find ${DIR}/orthofuse/${RUNOUT}/ -name contigs.csv 2> /dev/null) > {1}.orthout 2> /dev/null"
echo About to delete all the text files
find ${DIR}/orthofuse/${RUNOUT}/ -name '*groups' -delete
echo Search output files
find ${DIR}/orthofuse/${RUNOUT}/ -name '*orthout' 2> /dev/null | parallel -j $(CPU) "awk -F, -v max=0 '{if(\$$14>max){want=\$$1; max=\$$14}}END{print want}'" >> ${DIR}/orthofuse/${RUNOUT}/good.list
find ${DIR}/orthofuse/${RUNOUT}/ -name '*orthout' -delete
python ${MAKEDIR}/scripts/filter.py ${DIR}/orthofuse/${RUNOUT}/merged.fasta ${DIR}/orthofuse/${RUNOUT}/good.list > ${DIR}/assemblies/${RUNOUT}.orthomerged.fasta
rm ${DIR}/orthofuse/${RUNOUT}/good.list
${DIR}/assemblies/${RUNOUT}.ORP.fasta:${DIR}/assemblies/${RUNOUT}.orthomerged.fasta
cd ${DIR}/assemblies/ && cd-hit-est -M 5000 -T $(CPU) -c .98 -i ${DIR}/assemblies/${RUNOUT}.orthomerged.fasta -o ${DIR}/assemblies/${RUNOUT}.ORP.fasta
diamond blastx -p $(CPU) -e 1e-8 --top 0.1 -q ${DIR}/assemblies/${RUNOUT}.ORP.fasta -d ${MAKEDIR}/software/diamond/swissprot -o ${DIR}/assemblies/${RUNOUT}.ORP.diamond.txt
awk '{print $$2}' ${DIR}/assemblies/${RUNOUT}.ORP.diamond.txt | awk -F "|" '{print $$3}' | cut -d _ -f2 | sort | uniq | wc -l > ${DIR}/assemblies/${RUNOUT}.unique.ORP.txt
rm ${DIR}/assemblies/${RUNOUT}.ORP.fasta.clstr
${DIR}/reports/${RUNOUT}.busco.done:${DIR}/assemblies/${RUNOUT}.ORP.fasta
python $$(which run_BUSCO.py) -i ${DIR}/assemblies/${RUNOUT}.ORP.fasta -m transcriptome -f --cpu $(CPU) -o ${RUNOUT}
mv run_${RUNOUT} ${DIR}/reports/
touch ${DIR}/reports/${RUNOUT}.busco.done
${DIR}/reports/${RUNOUT}.transrate.done:${DIR}/reports/${RUNOUT}.busco.done
transrate -o ${DIR}/reports/transrate_${RUNOUT} -a ${DIR}/assemblies/${RUNOUT}.ORP.fasta --left ${READ1} --right ${READ2} -t $(CPU)
touch ${DIR}/reports/${RUNOUT}.transrate.done
${DIR}/quants/salmon_orthomerged_${RUNOUT}/quant.sf:${DIR}/reports/${RUNOUT}.transrate.done
salmon index --no-version-check -t ${DIR}/assemblies/${RUNOUT}.ORP.fasta -i ${RUNOUT}.ortho.idx --type quasi -k 31
salmon quant --no-version-check -p $(CPU) -i ${RUNOUT}.ortho.idx --seqBias --gcBias -l a -1 ${READ1} -2 ${READ2} -o ${DIR}/quants/salmon_orthomerged_${RUNOUT}
rm -fr ${RUNOUT}.ortho.idx
reportgen:
printf "\n\n***** QUALITY REPORT FOR: ${RUNOUT} **** \n\n"
printf "***** BUSCO SCORE ~~~~~> " | tee -a ${DIR}/reports/qualreport.${RUNOUT}
cat $$(find reports/run_${RUNOUT} -name 'short*') | sed -n 8p | tee -a ${DIR}/reports/qualreport.${RUNOUT}
printf "***** TRANSRATE SCORE ~~~~~> " | tee -a ${DIR}/reports/qualreport.${RUNOUT}
cat $$(find reports/transrate_${RUNOUT} -name assemblies.csv) | awk -F , '{print $$37}' | sed -n 2p | tee -a ${DIR}/reports/qualreport.${RUNOUT}
printf "***** TRANSRATE OPTIMAL SCORE ~~~~~> " | tee -a ${DIR}/reports/qualreport.${RUNOUT}
cat $$(find reports/transrate_${RUNOUT} -name assemblies.csv) | awk -F , '{print $$38}' | sed -n 2p | tee -a ${DIR}/reports/qualreport.${RUNOUT}
printf " \n\n"
printf " \n Orthofuser complete \n"
source ${MAKEDIR}/software/anaconda/install/bin/deactivate