-
Notifications
You must be signed in to change notification settings - Fork 1
/
blast-nest-filter.py
75 lines (46 loc) · 1.05 KB
/
blast-nest-filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
#from R02.4 filter blast results to remove nested hits
import sys
f = open(sys.argv[1],'r') #input blast
g = open(sys.argv[2],'w') #output blast
contig_col = int(sys.argv[3])
start_col = int(sys.argv[4])
end_col = int(sys.argv[5])
filename=sys.argv[1].split("/")[-1].split(".")[0]
#read blast into dict
blast={}
#f.readline() #skip header
for i in f:
k=i.split("\t")
contig = k[contig_col]
x= int(k[start_col])
y= int(k[end_col])
if y<x:
start=y
end=x
else:
start=x
end=y
if contig not in blast.keys():
blast[contig]=[(start,end,i)]
else:
blast[contig].append((start,end,i))
print(contig,start,end)
#sort blast by contigs, sstart
for i in blast.keys():
blast[i].sort(key=lambda x: x[0])
for i in blast.keys(): #contigs
n=-1
nests=[]
for j in blast[i]:
n=n+1
for p in blast[i]:
if j[2]!=p[2]:
if j[0]>=p[0] and j[1]<=p[1]:
nests.append(n)
print(nests)
c=-1
for v in blast[i]:
c=c+1
if c not in nests:
g.write(v[2]) #if not nested write to output