-
Notifications
You must be signed in to change notification settings - Fork 0
/
genome_annotations_prepare.py
55 lines (55 loc) · 2.53 KB
/
genome_annotations_prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# changed the names of the iterable as the names were those which i used in testing iterables :-)
#Gaurav Sablok
#Senior Postdoctoral Fellow Faculty of Natural and Agricultural Sciences
#Room 7-35, Agricultural Sciences Building
#University of Pretoria, Private Bag X20 Hatfield 0028,
#South Africa
import arguably
import pandas as pd
@arguably.command
def prepareGeneEnrichment(file = False, count= False, annotation = False):
"""
_summary_
A parser to prepare the files for the functional
enrichment of the gene categories across the
functionally annotated microbiome
Arguments:
file -- _description_
a text file containing your gene ontologies
"""
if file:
go_content = []
go_summarize = []
with open(file, 'r') as file:
for line in file.readlines():
go_content.append(line.strip())
for i in range(len(go_content)):
go_summarize.append(go_content[i].replace(";", ""))
if file and count:
go_content = []
go_summarize = []
go_count = []
with open(file, 'r') as file:
for line in file.readlines():
go_content.append(line.strip())
for i in range(len(go_content)):
go_summarize.append(go_content[i].replace(";", ""))
go_count.append([{i:go_summarize.count(i)} for i in set(go_summarize)])
return go_content, go_summarize, go_count
if annotation:
while True:
take_annotation = input("Please enter the path for the \
annotation file")
take_annotation_col = input("Please enter the annotation columns")
if take_annotation and take_annotation_col == "":
break
annotations = pd.read_csv("take_annotation", sep = ",")
annotations_col = annotations["take_annotation_col"].dropna().tolist()
final_annotations = [row for col in ([i.split(";") for i in annotations_col]) for row in col]
final_annotations_count = [{i:final_annotations.count(i)} for i in set(final_annotations)]
annotations_name = [i for i in set(final_annotations)]
annotations_count = [final_annotations.count(i) for i in set(final_annotations_count)]
annotations_dataframe = pd.DataFrame([(i,j)for i,j in zip(annotations_count, annotations_name)])
return final_annotations, final_annotations_count, annotations_name, annotations_dataframe
if __name__ == "__main__":
arguably.run()