-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathclump_finding_problem.py
41 lines (26 loc) · 1.08 KB
/
clump_finding_problem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
## Python function to find all distinct k-mers forming (L, t)-clumps in Genome given string Genome, and integers k, L, and t.
import itertools
from collections import Counter
def frequent_words_problem(string, k):
words = []
results = []
for i in range(len(string)):
word = "".join(string[i: i+k])
if len(word) == k:
words.append(word)
return Counter(words).most_common()
#string = "".join(open('frequent_words_problem_text.txt')).split()
#frequent_words_problem(string[0], int(string[1]))
def clump_finding_problem(string, k, L, t):
words = []
for i in range(len(string)):
string_1 = string[i: i+L]
if len(string_1) == L:
words.append(frequent_words_problem(string_1, k))
dummy = list(itertools.chain(*words))
return [y for y in set([x[0] for x in dummy if x[1] >= t])]
k = 11
L = 566
t = 18
string = "".join(open('clump_finding_problem_text.txt')).split()
clump_finding_problem(string[0], int(string[1]), int(string[2]), int(string[3]))