Skip to content

Commit 4ffd97f

Browse files
committed
add meta analysis example scripts
1 parent 797bd1f commit 4ffd97f

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
library(rmeta)
2+
3+
meta_sldsc <- function (myrow,dir,analysis,mytraits=traits,mysd=""){
4+
nbtrait = length(mytraits)
5+
M = 5961159
6+
res = NULL
7+
for (t in mytraits){
8+
data = read.table(paste(dir,"/",t,".",analysis,".results",sep=""),h=T)[myrow,]
9+
log = read.table(paste(dir,"/",t,".",analysis,".log",sep=""),h=F,fill=T)
10+
h2g = as.numeric(as.character(log[which(log$V4=="h2:"),5]))
11+
#
12+
myenrstat = (h2g/M)*((data$Prop._h2/data$Prop._SNPs)-(1-data$Prop._h2)/(1-data$Prop._SNPs)) #step 1
13+
myenrstat_z = qnorm(data$Enrichment_p/2) #step2
14+
myenrstat_sd = myenrstat/myenrstat_z #step3
15+
data = cbind(data, myenrstat, myenrstat_sd)
16+
#
17+
if (mysd==""){ #particular case of binary annotation, where sd=sqrt(p(1-p))
18+
data$Coefficient = M * sqrt(data$Prop._SNPs*(1-data$Prop._SNPs)) * data$Coefficient / h2g
19+
data$Coefficient_std_error = M * sqrt(data$Prop._SNPs*(1-data$Prop._SNPs)) * data$Coefficient_std_error / h2g
20+
} else {
21+
data$Coefficient = M * mysd * data$Coefficient / h2g
22+
data$Coefficient_std_error = M * mysd * data$Coefficient_std_error / h2g
23+
}
24+
#
25+
res = rbind(res,data)
26+
}
27+
res = data.frame(res)
28+
test0 = meta.summaries(res$Prop._h2 , res$Prop._h2_std_error , method="random")
29+
test1 = meta.summaries(res$Enrichment , res$Enrichment_std_error , method="random")
30+
test2 = meta.summaries(res$myenrstat , res$myenrstat_sd , method="random")
31+
test3 = meta.summaries(res$Coefficient , res$Coefficient_std_error , method="random")
32+
out = rbind(c(mean(res$Prop._SNPs),test0$summary, test0$se.summary, test1$summary, test1$se.summary, 2*pnorm(-abs(test2$summary/test2$se.summary)), test3$summary, test3$se.summary, 2*pnorm(-abs(test3$summary/test3$se.summary))))
33+
colnames(out) = c("propsnps","proph2","proph2_se","Enr","Enr_se","Enr_P","tau","tau_se","tau_P")
34+
rownames(out) = data$Category[1]
35+
out
36+
}
37+
38+
meta_sldsc(%d, "%s", "baselineLD_mammal", c(%s))
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/usr/bin/env python
2+
3+
import gzip
4+
import glob
5+
import sys
6+
import os
7+
import tempfile
8+
import subprocess
9+
import math
10+
11+
from joblib import Parallel, delayed
12+
13+
import matplotlib
14+
matplotlib.use('Agg')
15+
import matplotlib.pyplot
16+
import matplotlib.font_manager
17+
18+
def dformat(xxl):
19+
x = xxl if type(xxl) is str else xxl[0]
20+
xx = xxl if type(xxl) is str else xxl[0]
21+
with open(x, 'rt') as f:
22+
if "LD Score Regression (LDSC)" not in f.read(): return
23+
with open(x, 'rt') as f:
24+
lines = [ x.strip() for x in f.read().split("\n") ]
25+
lines = [ x if "/tmp" not in x else os.path.basename(xx).replace(".5", "_5").split('.')[-3] + "\t" + "\t".join(x.strip().split()[1:]) for x in lines ]
26+
if type(xxl) is list:
27+
def v(xx):
28+
print(xx, file = sys.stderr)
29+
with open(xx, 'rt') as f:
30+
llines = [ x.strip() for x in f.read().split("\n") ][-3:]
31+
llines = [ os.path.basename(xx).replace(".5", "_5").split('.')[-3] + "\t" + "\t".join(x.strip().split()[1:]) for x in llines if "/tmp" in x ]
32+
return llines
33+
r = Parallel(n_jobs = 64)(delayed(v)(xx) for xx in xxl[1:])
34+
for xx in r: lines += xx
35+
lidx = [ i for i, x in enumerate(lines) if x.startswith("Category") ][0]
36+
with open(os.path.dirname(x) + "/formatted-h2/" + os.path.basename(x).replace("460K.", "460K_").split('.')[0] + ".results", 'w') as o:
37+
o.write('\n'.join(lines[lidx:]))
38+
with open(os.path.dirname(x) + "/formatted-h2/" + os.path.basename(x).replace("460K.", "460K_").split('.')[0] + ".log", 'w') as o:
39+
o.write('\n'.join(lines[:lidx]))
40+
41+
def run(x, d):
42+
with tempfile.NamedTemporaryFile('wt') as o:
43+
with open("formatted-h2.R", 'r') as f:
44+
o.write(f.read() % (x, d, ", ".join([ '"%s"' % os.path.basename(x).split('.')[0] for x in glob.glob(d + "/*.result*") ])))
45+
o.flush()
46+
return subprocess.check_output("Rscript %s" % o.name, shell = True).decode()
47+
48+
def kv(vx):
49+
r = {}
50+
lines = vx.strip().split("\n")
51+
for i in range(int(len(lines) / 2)):
52+
keys = [ x for x in lines[i * 2].strip().split() ]
53+
for ii, k in enumerate(keys):
54+
r[k] = float(lines[i * 2 + 1].strip().split()[ii + 1])
55+
return lines[1].strip().split()[0], r
56+
57+
def trun(x, d):
58+
try:
59+
return kv(run(x, d))
60+
except:
61+
return None
62+
63+
def percentages(inputf):
64+
common = glob.glob("common_snps/*common*bed")
65+
def read_c(cc):
66+
with open(cc, 'r') as f:
67+
return { x.strip().split()[-1] for x in f }
68+
r = Parallel(n_jobs = 64)(delayed(read_c)(x) for x in common)
69+
all_common = set()
70+
for x in r:
71+
all_common = all_common.union(x)
72+
def totals(iif):
73+
count = 0
74+
with gzip.open(iif, 'rt') as ff:
75+
hmap = { i: x.strip() for i, x in enumerate(ff.readline().split("\t")) }
76+
counts = { x: 0 for _, x in hmap.items() }
77+
for line in ff:
78+
if line.strip().split()[2] not in all_common: continue
79+
count += 1
80+
for i, x in enumerate(line.strip().split()):
81+
counts[hmap[i]] += 1 if x == "1" else 0
82+
return count, counts
83+
results = Parallel(n_jobs = 64)(delayed(totals)(x) for x in inputf)
84+
gtotal = sum([ x[0] for x in results ])
85+
return { k.strip().replace('[', "").replace(']', "").replace(',', ""): sum([ x[1][k] if len(x) >= 2 and k in x[1] else 0 for x in results ]) / float(gtotal) for k, _ in results[0][1].items() }
86+
87+
def main(argc, argv):
88+
89+
if argc < 3:
90+
print("usage: meta-analysis.py ldsc-results-directory *.annot.gz", file = sys.stderr
91+
return 1
92+
93+
os.system("mkdir -p %s" % (argv[1] + "/formatted-h2"))
94+
for x in glob.glob(argv[1] + "/*"):
95+
dformat(x)
96+
values = Parallel(n_jobs = 64)(delayed(trun)(x, argv[1] + "/formatted-h2") for x in range(2, 150))
97+
98+
cmap = { x[0]: x[1] for x in values if x is not None }
99+
p = percentages(argv[2:])
100+
for k, v in cmap.items():
101+
print("%s\t%f\t%f\t%f\t%e\t%f\t%f\t%e" % (k, p[k.split("L2")[0]], v["Enr"], v["Enr_se"], v["Enr_P"], v["tau"], v["tau_se"], v["tau_P"]))
102+
return 0
103+
104+
if __name__ == "__main__":
105+
sys.exit(main(len(sys.argv), sys.argv))

0 commit comments

Comments
 (0)