forked from mateushg1/CRGGH
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExtract_dosages_09092022.py
120 lines (89 loc) · 4.07 KB
/
Extract_dosages_09092022.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import argparse
parser = argparse.ArgumentParser(description="Dosage de Local Ancestry")
required = parser.add_argument_group("Mandatory arguments")
required.add_argument('-t', '--tsv', help = "TSV from RFMix with RFMix2 template", required = True)
required.add_argument('-a', '--anc', help = "Ancestry to calculate dosage", required = True)
required.add_argument('-o', '--output', help = "Output file", required = True)
required.add_argument('-P', '--pos_list', help = "SNP positions", required = True, default='')
optional = parser.add_argument_group("Optional arguments")
#optional.add_argument('-P', '--pos_list', help = "SNP positions", required = False, default='')
args = parser.parse_args()
tsvFile = args.tsv
anc = args.anc
outputFile = args.output
position_list = args.pos_list
file1 = open(tsvFile)
countLine = 0
indList = []
dictPosteriori = {}
for line in file1:
if countLine == 1:
headerLine = line.strip().split("\t")
elif countLine > 1:
split = line.strip().split("\t")
pos = int(split[1])
for i in range(4, len(split)):
splitHeader = headerLine[i].split(":::")
if anc == splitHeader[2]:
if pos not in dictPosteriori:
dictPosteriori[pos] = {}
if splitHeader[0] not in dictPosteriori[pos]:
if splitHeader[0] not in indList:
indList.append(splitHeader[0])
dictPosteriori[pos][splitHeader[0]] = 0.0
dictPosteriori[pos][splitHeader[0]] = dictPosteriori[pos][splitHeader[0]] + float(split[i])
countLine = countLine + 1
fileOut_covar = open(f"{outputFile}.covar", "w")
fileOut_psam = open(f"{outputFile}.psam", "w")
fileOut_pvar = open(f"{outputFile}.pvar", "w")
countline_file2 = 0
if position_list != '':
file2 = open(position_list)
keysInt = []
for key in dictPosteriori:
keysInt.append(key)
keysInt.sort()
fileOut_pvar.write(f"#CHROM\tPOS\tID\n")
for line_file2 in file2:
row = line_file2.strip().split("\t")
posInt = int(row[1])
#print(f"usei {posInt} ")
#fileOut_pvar.write(f"{row[0]}\t{row[1]}\t{row[2]}\n")
for i in range(len(keysInt)):
if i == 0 and countline_file2 == 0:
fileOut_psam.write(f"#IDD\n")
for ind in dictPosteriori[keysInt[i]]:
fileOut_psam.write(f"{ind}\n")
if i == 0:
if posInt < keysInt[i]:
#print(f"A posicao {posInt} não existe")
break
if i + 1 != len(keysInt):
if posInt >= keysInt[i] and posInt < keysInt[i + 1]:
fileOut_pvar.write(f"{row[0]}\t{row[1]}\t{row[2]}\n")
#print(f"Imprimindo o {keysInt[i]}")
countind = 1
for ind in dictPosteriori[keysInt[i]]:
if countind < len(indList):
fileOut_covar.write(f"{dictPosteriori[keysInt[i]][ind]}\t")
else:
fileOut_covar.write(f"{dictPosteriori[keysInt[i]][ind]}\n")
countind = countind + 1
#fileOut.write(f"\n")
break
else:
countind = 1
if posInt >= keysInt[i]:
fileOut_pvar.write(f"{row[0]}\t{row[1]}\t{row[2]}\n")
#print(f"Imprimindo o {keysInt[i]}")
for ind in dictPosteriori[keysInt[i]]:
if countind < len(indList):
fileOut_covar.write(f"{dictPosteriori[keysInt[i]][ind]}\t")
else:
fileOut_covar.write(f"{dictPosteriori[keysInt[i]][ind]}\n")
countind = countind + 1
break
countline_file2 = countline_file2 + 1
fileOut_covar.close()
fileOut_psam.close()
fileOut_pvar.close()