-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_bloodcells_zscores.R
74 lines (57 loc) · 2.75 KB
/
get_bloodcells_zscores.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
arg = commandArgs(trailingOnly = TRUE)
chr = as.numeric(arg[1])
start = as.numeric(arg[2])
end = as.numeric(arg[3])
suppressMessages(library(data.table))
suppressMessages(library(dplyr))
suppressMessages(library(Matrix))
gwas_dir = '/gpfs/data/stephens-lab/finemap-uk-biobank/data/raw/BloodCells/gwas_maf001_info6/'
genotype_dir = '/gpfs/data/stephens-lab/finemap-uk-biobank/data/raw/BloodCells/regions_genotype_maf001_info6/'
output_dir = '/gpfs/data/stephens-lab/finemap-uk-biobank/data/raw/BloodCells/regions_zscores_maf001_info6/'
pheno_names = c("WBC_count", "RBC_count", "Haemoglobin", "MCV", "RDW", "Platelet_count",
"Plateletcrit", "PDW", "Lymphocyte_perc", "Monocyte_perc",
"Neutrophill_perc", "Eosinophill_perc", "Basophill_perc",
"Reticulocyte_perc", "MSCV", "HLR_perc")
zscores = c()
for(trait in pheno_names){
gwas = fread(paste0(gwas_dir,'bloodcells_gwas_chr', chr, '.', trait, '.glm.linear'))
colnames(gwas)[1] = 'CHR'
dt = gwas %>% filter(CHR == chr, POS >= start, POS <= end) %>% select(CHR, POS, ID, REF, ALT, T_STAT) %>%
rename(!!trait := T_STAT)
if(is.null(nrow(zscores))){
zscores = dt
}else{
zscores = inner_join(zscores, dt, by=c('ID', 'CHR', 'POS', 'REF', 'ALT'))
}
}
pos = zscores %>% select(CHR, POS, ID, REF, ALT)
zscores = zscores %>% select(-CHR, -POS, -ID, -REF, -ALT)
geno.file = paste0(genotype_dir, 'bloodcells_chr',
chr, '.', start, '.', end, '.raw.gz')
cat("Reading genotype data.\n")
geno <- suppressMessages(fread(geno.file,sep = "\t",header = TRUE,stringsAsFactors = FALSE))
class(geno) <- "data.frame"
# Extract the genotypes.
X <- as(as.matrix(geno[-(1:6)]),'dgCMatrix')
# X <- as.matrix(geno[-(1:6)])
pheno.file <- "/gpfs/data/stephens-lab/finemap-uk-biobank/data/raw/BloodCells/bloodcells.pheno.txt"
cat("Reading phenotype data.\n")
pheno <- suppressMessages(fread(pheno.file))
class(pheno) <- "data.frame"
ind = fread(paste0(genotype_dir, 'bloodcells_chr',
chr, '.', start, '.', end, '.psam'))
match.idx = match(ind$IID, pheno$IID)
pheno = pheno[match.idx,]
Y = pheno %>% select(-FID, -IID) %>% as.matrix
# centering
Y = t(t(Y) - colMeans(Y))
XtY = as.matrix(Matrix::crossprod(X, Y))
X.cm = colMeans(X)
xtxdiag = colSums(X^2) - nrow(X) * X.cm^2
maf <- suppressMessages(read.delim(paste0(genotype_dir, 'bloodcells_chr',
chr, '.', start, '.', end, '.afreq')))
maf = maf %>% mutate(maf = pmin(ALT_FREQS, 1-ALT_FREQS)) %>% select(ID, maf)
pos = inner_join(pos, maf, by='ID')
saveRDS(list(LD = paste0('bloodcells_chr', chr, '.', start, '.', end,'.matrix'),
XtY = XtY, n = nrow(Y), pos=pos, Z = zscores, XtXD = xtxdiag),
paste0(output_dir, 'bloodcells_chr',chr, '.', start, '.', end, '.z.rds'))