Skip to content

Commit

Permalink
change to unstandardized X
Browse files Browse the repository at this point in the history
  • Loading branch information
zouyuxin committed Nov 20, 2019
1 parent 117c2cc commit cf31322
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 60 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ analysis/*png
analysis/*html
analysis/*_cache
analysis/site_libs
data/*.XtX.Xty.rds
output/*.XtX.Xty.rds
1 change: 1 addition & 0 deletions analysis/_site.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ output:
toc_float: yes
theme: cosmo
highlight: textmate
code_folding: hide
134 changes: 83 additions & 51 deletions scripts/plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,32 +81,58 @@ scale_colour_discrete_gradient <- function(..., colours, bins = 5, na.value = "g
#' @param y.lim range of y axis
locus.zoom = function(z, pos, chr, gene.pos.map=NULL, z.ref.name=NULL, ld=NULL,
title = NULL, title.size = 10, true = NULL,
y.height=c(5,1.5), y.lim=NULL, xrange=NULL){
y.height=c(5,1.5), y.lim=NULL, y.type='logp',xrange=NULL){
if(is.null(xrange)){
xrange = c(min(pos), max(pos))
}
tmp = data.frame(POS = pos, p = -(pnorm(-abs(z), log.p = T) + log(2))/log(10))
pl_zoom = ggplot(tmp, aes(x = POS, y = p)) + geom_point(color = 'darkblue') +
ylab("-log10(p value)") + ggtitle(title) + xlim(xrange[1], xrange[2]) +
theme_bw() + theme(axis.title.x=element_blank(),
plot.title = element_text(size=title.size))
tmp = data.frame(POS = pos, p = -(pnorm(-abs(z), log.p = T) + log(2))/log(10), z = z)

if(!is.null(ld) && !is.null(z.ref.name)){
tmp$ref = names(z) == z.ref.name
tmp$r2 = ld^2
pl_zoom = ggplot(tmp, aes(x = POS, y = p, shape = ref, size=ref, color=r2)) + geom_point() +
ylab("-log10(p value)") + ggtitle(title) + xlim(xrange[1], xrange[2]) +
scale_color_gradientn(colors = c("darkblue", "deepskyblue", "lightgreen", "orange", "red"),
values = seq(0,1,0.2), breaks=seq(0,1,0.2)) +
# scale_colour_discrete_gradient(
# colours = c("darkblue", "deepskyblue", "lightgreen", "orange", "red"),
# limits = c(0, 1.01),
# breaks = c(0,0.2,0.4,0.6,0.8,1),
# guide = guide_colourbar(nbin = 100, raster = FALSE, frame.colour = "black", ticks.colour = NA)
# ) +
scale_shape_manual(values=c(20, 18), guide=FALSE) + scale_size_manual(values=c(2,5), guide=FALSE) +
theme_bw() + theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=15),axis.text = element_text(size=15),
plot.title = element_text(size=title.size))
if(y.type == 'logp'){
pl_zoom = ggplot(tmp, aes(x = POS, y = p, shape = ref, size=ref, color=r2)) + geom_point() +
ylab("-log10(p value)") + ggtitle(title) + xlim(xrange[1], xrange[2]) +
scale_color_gradientn(colors = c("darkblue", "deepskyblue", "lightgreen", "orange", "red"),
values = seq(0,1,0.2), breaks=seq(0,1,0.2)) +
# scale_colour_discrete_gradient(
# colours = c("darkblue", "deepskyblue", "lightgreen", "orange", "red"),
# limits = c(0, 1.01),
# breaks = c(0,0.2,0.4,0.6,0.8,1),
# guide = guide_colourbar(nbin = 100, raster = FALSE, frame.colour = "black", ticks.colour = NA)
# ) +
scale_shape_manual(values=c(20, 18), guide=FALSE) + scale_size_manual(values=c(2,5), guide=FALSE) +
theme_bw() + theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=15),axis.text = element_text(size=15),
plot.title = element_text(size=title.size))
}else if(y.type == 'z'){
pl_zoom = ggplot(tmp, aes(x = POS, y = z, shape = ref, size=ref, color=r2)) + geom_point() +
ylab("z score") + ggtitle(title) + xlim(xrange[1], xrange[2]) +
scale_color_gradientn(colors = c("darkblue", "deepskyblue", "lightgreen", "orange", "red"),
values = seq(0,1,0.2), breaks=seq(0,1,0.2)) +
# scale_colour_discrete_gradient(
# colours = c("darkblue", "deepskyblue", "lightgreen", "orange", "red"),
# limits = c(0, 1.01),
# breaks = c(0,0.2,0.4,0.6,0.8,1),
# guide = guide_colourbar(nbin = 100, raster = FALSE, frame.colour = "black", ticks.colour = NA)
# ) +
scale_shape_manual(values=c(20, 18), guide=FALSE) + scale_size_manual(values=c(2,5), guide=FALSE) +
theme_bw() + theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=15),axis.text = element_text(size=15),
plot.title = element_text(size=title.size))
}
}else{
if(y.type == 'logp'){
pl_zoom = ggplot(tmp, aes(x = POS, y = p)) + geom_point(color = 'darkblue') +
ylab("-log10(p value)") + ggtitle(title) + xlim(xrange[1], xrange[2]) +
theme_bw() + theme(axis.title.x=element_blank(),
plot.title = element_text(size=title.size))
}else if(y.type == 'z'){
pl_zoom = ggplot(tmp, aes(x = POS, y = z)) + geom_point(color = 'darkblue') +
ylab("z scores") + ggtitle(title) + xlim(xrange[1], xrange[2]) +
theme_bw() + theme(axis.title.x=element_blank(),
plot.title = element_text(size=title.size))
}
}

if(!is.null(y.lim)){
Expand Down Expand Up @@ -140,48 +166,50 @@ locus.zoom = function(z, pos, chr, gene.pos.map=NULL, z.ref.name=NULL, ld=NULL,
#' @param title title of the plot
#' @param title.size the size of the title
#' @param true the true value
#' @param plotz whether to plot locuszoom plot
#' @param plot.locuszoom whether to plot locuszoom plot
#' @param y.lim range of y axis
#' @param y.susie the y axis of the SuSiE plot, 'PIP' or 'p', 'p' refers to -log10(p)
#' @param y.susie the y axis of the SuSiE plot, 'PIP' or 'p' or 'z', 'p' refers to -log10(p)
susie_plot_locuszoom = function(z, model, pos, chr, gene.pos.map = NULL, z.ref.name, ld,
title = NULL, title.size = 10, true = NULL,
plotz = TRUE, y.lim=NULL, y.susie='PIP', xrange=NULL){
plot.locuszoom = TRUE, y.lim=NULL, y.susie='PIP', xrange=NULL){
if(is.null(xrange)){
xrange = c(min(pos), max(pos))
}
if(plotz){
pl_zoom = locus.zoom(z, pos = pos, chr = chr, ld=ld, z.ref.name = z.ref.name, title = title, title.size = title.size, y.lim=y.lim, xrange=xrange)
if(plot.locuszoom){
if(y.susie == 'z'){
y.type = 'z'
}else{
y.type = 'logp'
}
pl_zoom = locus.zoom(z, pos = pos, chr = chr, ld=ld, z.ref.name = z.ref.name, title = title, title.size = title.size, y.lim=y.lim, y.type=y.type, xrange=xrange)
}
pip = model$pip
tmp = data.frame(POS = pos, PIP = pip, p = -(pnorm(-abs(z), log.p = T) + log(2))/log(10))
tmp = data.frame(POS = pos, PIP = pip, p = -(pnorm(-abs(z), log.p = T) + log(2))/log(10), z = z)
if(y.susie == 'PIP'){
if(plotz){
pl_susie = ggplot(tmp, aes(x = POS, y = PIP)) + geom_point(show.legend = FALSE, size=3) +
xlim(xrange[1], xrange[2]) +
theme_bw() + theme(axis.title.x=element_blank(), axis.text.x=element_blank())
}else{
pl_susie = ggplot(tmp, aes(x = POS, y = PIP)) + geom_point(show.legend = FALSE, size=3) +
xlim(xrange[1], xrange[2]) + ggtitle(title) +
theme_bw() + theme(axis.title.x=element_blank(),
axis.text = element_text(size=15),
axis.title.y = element_text(size=15),
plot.title = element_text(size=title.size))
pl_susie = ggplot(tmp, aes(x = POS, y = PIP)) + geom_point(show.legend = FALSE, size=3) +
xlim(xrange[1], xrange[2]) +
theme_bw() + theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.text = element_text(size=15),
axis.title.y = element_text(size=15))
if(!plot.locuszoom){
pl_susie = pl_susie + ggtitle(title) + theme(plot.title = element_text(size=title.size))
}
}else if(y.susie == 'p'){
if(plotz){
pl_susie = ggplot(tmp, aes(x = POS, y = p)) + geom_point(show.legend = FALSE, size=3) +
ylab("-log10(p value)") + xlim(xrange[1], xrange[2]) +
theme_bw() + theme(axis.title.x=element_blank(), axis.text.x=element_blank())
pl_susie = ggplot(tmp, aes(x = POS, y = p)) + geom_point(show.legend = FALSE, size=3) +
ylab("-log10(p value)") + xlim(xrange[1], xrange[2]) +
theme_bw() + theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.text = element_text(size=15),
axis.title.y = element_text(size=15))
if(!plot.locuszoom){
pl_susie = pl_susie + ggtitle(title) + theme(plot.title = element_text(size=title.size))
# pl_susie = pl_susie + geom_hline(yintercept=-log10(5e-08), linetype='dashed', color = 'red')
}else{
pl_susie = ggplot(tmp, aes(x = POS, y = p)) + geom_point(show.legend = FALSE, size=3) +
ylab("-log10(p value)") + xlim(xrange[1], xrange[2]) + ggtitle(title) +
theme_bw() + theme(axis.title.x=element_blank(),
axis.text = element_text(size=15),
axis.title.y = element_text(size=15),
plot.title = element_text(size=title.size))
}

}else if(y.susie == 'z'){
pl_susie = ggplot(tmp, aes(x = POS, y = z)) + geom_point(show.legend = FALSE, size=3) +
ylab("z scores") + xlim(xrange[1], xrange[2]) +
theme_bw() + theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.text = element_text(size=15),
axis.title.y = element_text(size=15))
if(!plot.locuszoom){
pl_susie = pl_susie + ggtitle(title) + theme(plot.title = element_text(size=title.size))
}
}

if(!is.null(true)){
Expand Down Expand Up @@ -209,19 +237,23 @@ susie_plot_locuszoom = function(z, model, pos, chr, gene.pos.map = NULL, z.ref.n
pl_susie = pl_susie + geom_point(data=tmp.cs, aes(x=POS, y=p, color=CS),
shape=1, size=3, stroke=1.5) +
scale_color_manual(values=colors)
}else if(y.susie == 'z'){
pl_susie = pl_susie + geom_point(data=tmp.cs, aes(x=POS, y=z, color=CS),
shape=1, size=3, stroke=1.5) +
scale_color_manual(values=colors)
}
}

if(!is.null(gene.pos.map)){
pl_gene = plot_geneName(gene.pos.map, xrange = xrange, chr=chr)
if(plotz){
if(plot.locuszoom){
g = egg::ggarrange(pl_zoom, pl_susie, pl_gene, nrow=3, heights = c(4,4,1.5), draw=FALSE)
}else{
g = egg::ggarrange(pl_susie, pl_gene, nrow=2, heights = c(5.5,1.5), draw=FALSE)
}

}else{
if(plotz){
if(plot.locuszoom){
g = egg::ggarrange(pl_zoom, pl_susie, nrow=2, heights = c(4,4), draw=FALSE)
}else{
g = pl_susie
Expand Down
2 changes: 1 addition & 1 deletion scripts/prepare.region.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ module load gcc/6.2.0 R/3.5.0

zcat /gpfs/data/stephens-lab/finemap-uk-biobank/data/raw/height.csv.gz | awk 'BEGIN{FS=","} {print $1 OFS $1}' | tail -n +2 > height.id.txt

zcat /gpfs/data/pierce-lab/uk-biobank-genotypes/ukb_mfi_chr${regionchr}_v3.txt.gz | awk '($6 < 0.01 || $8 < 0.9){print $2}' > ukb_mfi_chr${regionchr}_v3_exclude_id.txt
zcat /gpfs/data/pierce-lab/uk-biobank-genotypes/ukb_mfi_chr${regionchr}_v3.txt.gz | awk '($8 < 0.9){print $2}' > ukb_mfi_chr${regionchr}_v3_exclude_id.txt

plink2 --sample /gpfs/data/xhe-lab/uk-biobank/data/genotypes/ukb27386_imp_v3_s487324.sample \
--bgen /gpfs/data/pierce-lab/uk-biobank-genotypes/ukb_imp_chr${regionchr}_v3.bgen \
Expand Down
12 changes: 4 additions & 8 deletions scripts/prepare.susieinput.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,15 @@ cols = which(colnames(Z) %in% c("age","pc_genetic1","pc_genetic2","pc_genetic3",
Z[,cols] = scale(Z[,cols])
Z[,'age2'] = Z[,'age']^2

print(dim(Z))

# Compute XtX and Xty
y = pheno$height
names(y) = pheno$id

# Center y
y = y - mean(y)
# Center scale X
cm = Matrix::colMeans(X, na.rm = TRUE)
csd = susieR:::compute_colSds(X)
csd[csd == 0] = 1
X = as.matrix(t((t(X) - cm) / csd))
# Center X
X = scale(X, center=TRUE, scale = FALSE)
xtxdiag = colSums(X^2)

A <- crossprod(Z) # Z'Z
# chol decomposition for (Z'Z)^(-1)
Expand All @@ -71,7 +67,7 @@ S = R %*% crossprod(Z, y) # RZ'y
# Load LD matrix from raw genotype
ld.matrix = as.matrix(fread(paste0('height.', region.name, '.matrix')))
# X'X
XtX = ld.matrix*(nrow(X)-1) - crossprod(W) # W'W = X'ZR'RZ'X = X'Z(Z'Z)^{-1}Z'X
XtX = sqrt(xtxdiag) * t(ld.matrix*sqrt(xtxdiag)) - crossprod(W) # W'W = X'ZR'RZ'X = X'Z(Z'Z)^{-1}Z'X
rownames(XtX) = colnames(XtX) = colnames(X)
# X'y
Xty = as.vector(y %*% X)
Expand Down

0 comments on commit cf31322

Please sign in to comment.