-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctional_annotation_team2.py
153 lines (126 loc) · 6.5 KB
/
functional_annotation_team2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python3
import sys
import os
import re
import subprocess
import argparse
from domain.interpro_annotation import annotation_interproscan
from domain.eggNOG_annotation import eggnog_act
from domain.cvt_egg2gff import convert_eggnog
from domain.interpro_post_acts import interproscan_modify
from cluster.clustering_and_mapping import merge_file_map, relabel, mapping_back, merge
from domain.annotation_one_line import ol_act
from sp.signalprun import signalP_finding
from sp.signalprun import signalP_finding
from crt.CRT import crt_act
from crt.convert_crt_to_gff import convert_crt
from bedtools_scripts.bedtools_sort import sort
from operon.get_gff import convert_to_gff
from cluster.add_ids import add_ids_act
def main():
parser = argparse.ArgumentParser(description='Functional annotation')
parser.add_argument('-i', '--input', help='Input directory with 50 fna file', default=sys.stdin, type=str, required=True)
parser.add_argument('-ni', '--nucleotide_input', help='Input directory with fna files', type=str, default=False)
parser.add_argument('-e', '--eggnog', help='Search against eggnog', default=False)
parser.add_argument('-sp','--signalP', help='Running signalP to annotate signal peptide', default=False)
parser.add_argument('-tm','--tmprotein', help='Running tmhmm to annotate transmembrane proteins', default=False)
parser.add_argument('-crt','--crispr', help='Running CRISPR annotatation', default=False)
parser.add_argument('-ard','--antibiotic',help='Running antibiotic annotatation', default=False)
parser.add_argument('-ol','--one_line', help='One line annotation with gene names', action='store_true')
parser.add_argument('-v', '--verbose', help='Verbose mode', default=False)
parser.add_argument('-op', '--operon', help='operon annotation', default=False)
args = parser.parse_args()
#Generate cluster fasta file and uc file
Input_directory = args.input
Cluster_path = './Cluster'
Cluster_path2fastafile, Cluster_path2ucfile = relabel(Input_directory, Cluster_path)
# Cluster_path2fastafile = './Cluster/ProdigalCluster_97_n.fasta'
Dir_cluster = []
Dir_merge = []
# Dir_merge.append('./Prod_RNA_Results')
if args.verbose:
print("Interproscan is running for domain analysis")
os.system('mkdir ./Interproscan')
Dir_cluster.append('./Interproscan')
output_for_interproscan = './Interproscan/97_interpro.gff'
output_for_final = './Interproscan/97_interpro_trimmed.gff'
# print(Cluster_path2fastafile,output_for_interproscan)
annotation_interproscan(Cluster_path2fastafile,output_for_interproscan)
interproscan_modify(Cluster_path2fastafile,output_for_interproscan,output_for_final)
os.system('rm ./Interproscan/97_interpro.gff')
if args.eggnog:
if args.verbose:
print("eggnog is running for domain analysis")
os.system('mkdir ./EggNOG')
Dir_cluster.append('./EggNOG')
output_for_eggnog = './EggNOG/97_eggnog.gff'
eggnog_act(Cluster_path2fastafile,output_for_eggnog)
convert_eggnog(Cluster_path2fastafile,output_for_eggnog)
## ------------------------- Tool Script here -------------------------##
# Cluster_path2fastafile is the cluster_centriod file
# Input_directory is the Prodigal_fasta directory containing 50 faa file
# Signal peptide annotation
if args.signalP:
if args.verbose:
print("signalP is running for signal peptide annotation")
signalP_finding(Input_directory)
Dir_merge.append('./signalp')
# Transmembrane protein annotation
if args.tmprotein:
if args.verbose:
print("tmhmm is running for transmembrane protein annotation")
os.system("bash ./tm/tmhmm.sh " + Input_directory)
Dir_merge.append('./tmhmm')
# CRISPR annotation
if args.crispr:
if args.nucleotide_input:
if args.verbose:
print("CRISPR annotation is running")
crt_act(args.nucleotide_input)
convert_crt(args.nucleotide_input)
else: print("nucleotide fna needed")
# Antibiotic resistance annotation
if args.antibiotic:
if args.verbose:
print("Anbiotic resistance annotation is running")
os.system("chmod 755 ./ard/ard_run.sh")
os.system("./ard/ard_run.sh -i " + Cluster_path2fastafile)
Dir_cluster.append('./victors')
Dir_cluster.append('./vfdb')
Dir_cluster.append('./rgi')
#Operon annotation
if args.operon:
if args.verbose:
print("Operon annotation is running")
os.system("/projects/team2/func_annotation/tools/ncbi-blast-2.8.1+/bin/makeblastdb -in /projects/team2/func_annotation/Operon/operon_ref.fasta -dbtype prot")
os.system("mkdir operon_final_result")
os.system("chmod 755 operon_final_result")
os.system("/projects/team2/func_annotation/tools/blastp -db /projects/team2/func_annotation/Operon/operon_ref.fasta -query " + Cluster_path2fastafile + " -num_threads 4 -evalue 1e-10 -outfmt \"6 stitle qseqid sseqid sstart send qcovs bitscore score evalue sstrand \" > ./operon_final_result/operon_output")
convert_to_gff(Cluster_path2fastafile) #output file: ./operon_final_result/97_operon.gff
Dir_cluster.append('./operon_final_result')
## ------------------------- Tool Script end -------------------------##
Output_gff_path = './Func_annotation_result'
mapping_back(Dir_cluster, Cluster_path, Output_gff_path)
Position_map = merge_file_map('./' + Input_directory)
merge(Dir_merge, Output_gff_path, Position_map)
# merge(Dir_merge, Output_gff_path)
## ------------------------- Tool Script end -------------------------##
# for files in os.listdir('./Func_annotation_result'):
# files = './Func_annotation_result/' + files
# os.system('sort ' + files + ' > ' + files +'_sorted.gff')
# os.system('rm '+ files)
for files in os.listdir('./Func_annotation_result'):
files = './Func_annotation_result/' + files
add_ids_act(files,files[:-4]+'_sorted.gff')
os.system('rm '+ files)
## ------------------------- One Line Annotation ---------------------##
if args.one_line:
os.system('ls ./Func_annotation_result/* > ./gff_reference')
os.system('ls ' + Input_directory + '/* > ./fastas')
file_reference = "./gff_reference"
file_fastas = "./fastas"
ol_act(file_reference,file_fastas)
os.system('rm ./gff_reference')
os.system('rm ./fastas')
if __name__ == "__main__":
main()