-
Notifications
You must be signed in to change notification settings - Fork 0
/
scaffolding.py
231 lines (200 loc) · 8.77 KB
/
scaffolding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
__author__ = 'nluhmann'
#scaffold reconstructed adjacencies at each internal node
def scaffoldAdjacencies(reconstructedAdj):
scaffoldsPerNode = {}
for node in reconstructedAdj:
adjacencies = reconstructedAdj[node]
#key: (leftmost marker in scaffold, rightmost marker in scaffold), value: [marker]
scaffold = {}
for adj in adjacencies:
inserted = False
#check if adjacency can extend an existing scaffold, extend only one scaffold if there are two possibilities
for scaff in scaffold.keys():
if adj[0] == scaff[0]:
markerList = scaffold.pop(scaff)
other = getOtherExtremity(adj[1])
markerList.insert(0,adj[1])
markerList.insert(0,other)
scaffold[(other,scaff[1])] = markerList
inserted = True
break
elif adj[0] == scaff[1]:
#remove and return current scaffold
markerList = scaffold.pop(scaff)
#extend markerList with other extremity of the current adjacency
right = getOtherExtremity(adj[1])
markerList.append(adj[1])
markerList.append(right)
#save new scaffold with updated key
scaffold[(scaff[0],right)] = markerList
#adjacency has been inserted
inserted = True
#get out as soon as one scaffold has been extended
break
elif adj[1] == scaff[0]:
markerList = scaffold.pop(scaff)
other = getOtherExtremity(adj[0])
markerList.insert(0, adj[0])
markerList.insert(0,other)
#save new scaffold with updated key
scaffold[(other,scaff[1])] = markerList
inserted = True
break
elif adj[1] == scaff[1]:
markerList = scaffold.pop(scaff)
other = getOtherExtremity(adj[0])
markerList.append(adj[0])
markerList.append(other)
#save new scaffold with updated key
scaffold[(scaff[0],other)] = markerList
inserted = True
break
if not inserted:
#create new scaffold
left = getOtherExtremity(adj[0])
right = getOtherExtremity(adj[1])
scaffold[(left,right)] = [left,adj[0],adj[1],right]
scaffold2 = mergeScaffolds(scaffold)
scaffoldsPerNode[node] = scaffold2
return scaffoldsPerNode
def mergeScaffolds(scaffold):
#then merge scaffolds that have the same border
merg = True
while merg:
merg = False
#case 1: two scaffolds have the same left border (find by sorting keylist!)
keys = scaffold.keys()
keys.sort(key=lambda tup: tup[0])
for i in range(0,len(keys)-1):
if keys[i][0] == keys[i+1][0]-1 and keys[i][0] % 2 == 1:
#merge scaffolds
firstList = scaffold.pop(keys[i])
secondList = scaffold.pop(keys[i+1])
#reverse first list, then chop of two last elements, then merge lists
rev = firstList[::-1]
revChopped = rev[:-2]
revChopped += secondList
scaffold[(revChopped[0],revChopped[-1])] = revChopped
merg = True
#case 2: two scaffolds have the same right border (find by sorting keylist!)
keys = scaffold.keys()
keys.sort(key=lambda tup: tup[1])
for i in range(0,len(keys)-1):
if keys[i][1] == keys[i+1][1]-1 and keys[i][1] % 2 == 1:
#merge scaffolds
firstList = scaffold.pop(keys[i])
secondList = scaffold.pop(keys[i+1])
#reverse first list, then chop of two last elements, then merge lists
rev2 = firstList[::-1]
revChopped2 = rev2[2:]
secondList += revChopped2
scaffold[(secondList[0],secondList[-1])] = secondList
merg = True
#case 3,4: the higher,lower extremity is at the right border, the lower,higher at the left (we have to compare all of them here)
merged = True
while merged:
merged = False
keys = scaffold.keys()
for i in range(0,len(keys)-1):
changed = False
for j in range(i+1,len(keys)):
if keys[i][1] == keys[j][0]+1 and keys[i][1] % 2 == 0:
firstList = scaffold.pop(keys[i])
secondList = scaffold.pop(keys[j])
secondChop = secondList[2:]
firstList += secondChop
scaffold[(firstList[0],firstList[-1])] = firstList
changed = True
break
elif keys[i][0] == keys[j][1]-1 and keys[i][0] % 2 == 1:
firstList = scaffold.pop(keys[i])
secondList = scaffold.pop(keys[j])
firstChop = firstList[2:]
secondList += firstChop
scaffold[(secondList[0],secondList[-1])] = secondList
changed = True
break
elif keys[i][1] == keys[j][0]-1 and keys[i][1] % 2 == 1:
firstList = scaffold.pop(keys[i])
secondList = scaffold.pop(keys[j])
secondChop = secondList[2:]
firstList += secondChop
scaffold[(firstList[0],firstList[-1])] = firstList
changed = True
break
elif keys[i][0] == keys[j][1]+1 and keys[i][0] % 2 == 0:
firstList = scaffold.pop(keys[i])
secondList = scaffold.pop(keys[j])
firstChop = firstList[2:]
secondList += firstChop
scaffold[(secondList[0],secondList[-1])] = secondList
changed = True
break
if changed:
merged = True
break
return scaffold
def getOtherExtremity(extremity):
if int(extremity) % 2 == 0:
other = int(extremity) - 1
elif int(extremity) % 2 == 1:
other = int(extremity) + 1
else:
print "Houston there is a problem!"
return other
def undoubleScaffolds(scaffoldsPerNode):
undoubled = {}
for node in scaffoldsPerNode:
undoubled[node] = []
scaffolds = scaffoldsPerNode[node]
for sc in scaffolds:
scaffold = []
scaff = scaffolds[sc]
for i in range(0,len(scaff)-1,2):
first = int(scaff[i])
second = int(scaff[i+1])
if first - second == -1:
mark = str(second/2)
elif first - second == 1:
marker = first/2
mark = "-"+str(marker)
else:
print "Houston there is a problem!"
scaffold.append(mark)
undoubled[node].append(scaffold)
return undoubled
def outputUndoubledScaffolds(scaffolds,output):
file = open(output,"w")
for node in scaffolds:
file.write(">"+node+" "+str(len(scaffolds[node]))+"\n")
counter = 1
for scaff in scaffolds[node]:
file.write("#"+str(counter)+"\n")
file.write(" ".join(str(x) for x in scaff)+" $\n")
counter += 1
file.close()
def outputScaffolds(scaffolds,output):
file = open(output,"w")
for node in scaffolds:
file.write(">"+node+" "+str(len(scaffolds[node]))+"\n")
counter = 1
for scaff in scaffolds[node]:
file.write("#"+str(counter)+"\n")
file.write(" ".join(str(x) for x in scaffolds[node][scaff])+" $\n")
counter += 1
file.close()
def sanityCheckScaffolding(undoubledScaffolds):
log=""
for node in undoubledScaffolds:
scaffolds = undoubledScaffolds[node]
testHash = {}
for scaff in scaffolds:
for elem in scaff:
if "-" in str(elem):
elem = elem[1:]
if elem in testHash:
log+=str(node)+"\n"
log+=str(elem)+"\n"
else:
testHash[elem] = 1
return log