-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlincRNA_pipeline.sh
115 lines (80 loc) · 4.33 KB
/
lincRNA_pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/bin/bash
# Script to process cuffcompare output file to generate lincRNA
# Usage:
# sh lincRNA_pipeline.sh -c cuffcompare_out_annot_no_annot.combined.gtf -g Brapa_sequence_v1.2.fa -r Brassica_rapa_v1.2.cds -b TE_RNA_transcripts.fa
while getopts ":b:c:g:hr:" opt; do
case $opt in
b)
blastfile=$OPTARG
;;
c)
comparefile=$OPTARG
;;
h) echo "USAGE : sh lincRNA_pipeline.sh
-c </path/to/cuffcompare_output file>
-g </path/to/reference genome file>
-r </path/to/reference CDS file>
-b </path/to/RNA file>"
exit 1
;;
g)
referencegenome=$OPTARG
;;
r)
referenceCDS=$OPTARG
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
wget -O- https://github.com/TransDecoder/TransDecoder/archive/2.0.1.tar.gz | tar xzvf -
makeblastdb -in $blastfile -dbtype nucl -out $blastfile.blast.out
makeblastdb -in $referenceCDS -dbtype nucl -out $referenceCDS.blast.out
grep '"u"' $comparefile | \
gffread -w transcripts_u.fa -g $referencegenome - && \
python2.7 get_gene_length_filter.py transcripts_u.fa transcripts_u_filter.fa && \
TransDecoder-2.0.1/TransDecoder.LongOrfs -t transcripts_u_filter.fa
sed 's/ .*//' transcripts_u_filter.fa | sed -ne 's/>//p' \
> transcripts_u_filter.fa.genes
cd transcripts_u_filter.fa.transdecoder_dir
sed 's/|.*//' longest_orfs.cds | sed -ne 's/>//p' | uniq \
> longest_orfs.cds.genes
grep -v -f longest_orfs.cds.genes ../transcripts_u_filter.fa.genes \
> longest_orfs.cds.genes.not.genes
sed 's/^/>/' longest_orfs.cds.genes.not.genes \
> temp && mv temp longest_orfs.cds.genes.not.genes
python ../extract_sequences.py longest_orfs.cds.genes.not.genes ../transcripts_u_filter.fa longest_orfs.cds.genes.not.genes.fa
sed 's/ /./' longest_orfs.cds.genes.not.genes.fa \
> temp && mv temp longest_orfs.cds.genes.not.genes.fa
blastn -query longest_orfs.cds.genes.not.genes.fa -db ../$blastfile.blast.out -out longest_orfs.cds.genes.not.genes.fa.blast.out -outfmt 6
python ../filter_sequences.py longest_orfs.cds.genes.not.genes.fa.blast.out longest_orfs.cds.genes.not.genes.fa.blast.out.filtered
grep ">" longest_orfs.cds.genes.not.genes.fa | sed 's/>//' \
> longest_orfs.cds.genes.not.genes_only
python ../fasta_remove.py longest_orfs.cds.genes.not.genes.fa.blast.out.filtered longest_orfs.cds.genes.not.genes_only lincRNA.genes
sed 's/^/>/' lincRNA.genes > temp && mv temp lincRNA.genes
python ../extract_sequences-1.py lincRNA.genes longest_orfs.cds.genes.not.genes.fa lincRNA.genes.fa
cap3 lincRNA.genes.fa
cat lincRNA.genes.fa.cap.singlets lincRNA.genes.fa.cap.contigs \
> lincRNA_genes_non_redundant.fa
grep ">" lincRNA_genes_non_redundant.fa \
> lincRNA_gene_non_redundant.genes_only
blastn -query lincRNA_genes_non_redundant.fa -db ../$referenceCDS.blast.out -out lincRNA_non_redundant.fa_cds_blast.out -evalue 1e-30 -outfmt 6
python ../linc_RNA_filter.py lincRNA_non_redundant.fa_cds_blast.out lincRNA_non_redundant.fa_cds_blast.out.filtered
grep -v -f lincRNA_non_redundant.fa_cds_blast.out.filtered lincRNA_gene_non_redundant.genes_only \
> lincRNA_non_redundant_filtered.genes
python ../extract_sequences-1.py lincRNA_non_redundant_filtered.genes lincRNA_genes_non_redundant.fa lincRNA_non_redundant_filtered.genes.fa
grep "^>" lincRNA_non_redundant_filtered.genes.fa \
> lincRNA_non_redundant_filtered.genes.only
makeblastdb -in lincRNA_non_redundant_filtered.genes.fa -dbtype nucl -out lincRNA_non_redundant_filtered_blast.out
blastn -query lincRNA_non_redundant_filtered.genes.fa -db lincRNA_non_redundant_filtered_blast.out -out lincRNA_non_redundant_filtered_blast.out.blast.out -evalue 1e-30 -outfmt 6
python ../linc_RNA_filter-1.py lincRNA_non_redundant_filtered_blast.out.blast.out lincRNA_non_redundant_filtered_blast.out.blast.out.uniq
grep -v -f lincRNA_non_redundant_filtered_blast.out.blast.out.uniq lincRNA_non_redundant_filtered.genes.only \
> lincRNA_non_redundant_filtered.genes.only_filtered
python ../extract_sequences-1.py lincRNA_non_redundant_filtered.genes.only_filtered lincRNA_non_redundant_filtered.genes.fa lincRNA_non_redundant_filtered.genes.only_filtered.fa
python ../fasta_header_rename.py lincRNA_non_redundant_filtered.genes.only_filtered.fa lincRNA_final_transcripts.fa