diff --git a/Fig1_code/Fig1.R b/Fig1_code/Fig1.R new file mode 100644 index 0000000..5083970 --- /dev/null +++ b/Fig1_code/Fig1.R @@ -0,0 +1,126 @@ + +####### +setwd('/work3/maize_eGWAS/figures/Fig1_code/') +suppressPackageStartupMessages({ + library(ggpubr) + library(cowplot) +}) + +npg<-ggsci::pal_npg('nrc')(10) +hb<-read.csv('e340_Heritability.csv',row.names = 1) + +#### Hb histgram +p1<-ggplot(hb,aes(Hb)) + + stat_bin(breaks=seq(0,1,0.05),fill='gray',color=1) + + theme_classic(12) + + theme(axis.text = element_text(colour = 'black'), + axis.ticks = element_line(colour = 'black')) + + xlab(bquote(H^2)) + ylab('Number of genes') + + scale_x_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) + + scale_y_continuous(expand = expansion(c(0,0.05))) + +#### Hb 2D density +p2<-ggplot(hb,aes(log2GEMean,Hb)) + + stat_density_2d(geom='polygon',aes(fill = after_stat(level)), + breaks=seq(0,1,0.05)) + + scale_fill_distiller(palette=12, direction=1) + + xlab(bquote(log[2](FPKM))) + ylab(bquote(H^2)) + + theme_classic(12) + + theme(legend.position = c(0.9,0.8),legend.key.size = unit(12,'point'), + axis.text = element_text(colour = 'black'), + axis.ticks = element_line(colour = 'black')) + + scale_x_continuous(expand = expansion(c(0,0.05)),limits = c(0,NA)) + + scale_y_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) + + +#### Volcano plot +library(EnhancedVolcano) +vol <- read.csv('Hb_GO_redundancyReduced.csv',row.names=1) +head(vol) +max(vol[which(vol$FDR<=0.05),"pValue"]) + +BP = vol$desc[vol$Category =='Biological process'] +CC = vol$desc[vol$Category =='Cellular component'] +MF = vol$desc[vol$Category =='Molecular function'] +celltype1 <- BP +celltype2 <- MF +keyvals.shape <- ifelse(vol$desc %in% celltype1, 17, + ifelse(vol$desc %in% celltype2, 19, 18)) + +keyvals.shape[is.na(keyvals.shape)] <-18 +names(keyvals.shape)[keyvals.shape == 18] <- 'Cellular component' +names(keyvals.shape)[keyvals.shape == 17] <- 'Biological process' +names(keyvals.shape)[keyvals.shape == 19] <- 'Molecular function' +volSig = subset(vol, abs(log2FoldChange)>log2(1.2) & FDR <=0.05) # & Category != 'Cellular component') +rownames(volSig) + +p3 <- EnhancedVolcano(vol,lab = vol$desc,x = 'log2FoldChange',y = 'pValue', + shapeCustom = keyvals.shape, + pCutoff = 4.94e-05, FCcutoff = log2(1.1), colAlpha = .7, + ylim = c(0, 12), xlim = c(-0.8,0.8), + pointSize = 2, + labSize = 2, + caption = 'total = 981 GO terms', + gridlines.major = T,gridlines.minor = F, + title = NULL, subtitle = NULL, + xlab = bquote(log[2]("fold change")), + ylab = bquote(-log[10](italic(P))), + borderWidth = 1.5, + legendLabSize=9, + legendIconSize = 2.5, + drawConnectors = F) + + theme_classic(12) + + scale_y_continuous(expand = expansion(c(0,0.05)),limits = c(0,12)) + + guides(color = FALSE) + xlim(-1,0.6) + + theme(legend.position = 'top', legend.title = element_blank(), + legend.text = element_text(size=9), + axis.text = element_text(colour = 'black'), + axis.ticks = element_line(colour = 'black')) + +#### Hb_Trop_VS_Temperate +df <- read.csv('Hb_Trop_VS_Temperate.csv',row.names = 1) +df <- subset(df, HbTemp >0 & HbTrop > 0) + +p4<-ggplot(data = df, aes(HbTemp, HbTrop)) + + stat_density_2d(aes(fill = ..level..), geom='polygon') + + # geom_smooth(position = 'identity', method = lm) + + scale_fill_distiller(palette=12, direction=1) + + theme(legend.position='right')+ + xlim(0,1) + ylim(0,1) + + xlab(bquote(H^2~"Tropical")) + + ylab(bquote(H^2~"Temperate")) + + theme_classic(12) +# coef <- round(coef(lm(HbTemp~HbTrop, df)), 2) +# p4 <- p4 + annotate("text", x=0.05, y= 0.95, hjust=0, col=2, size=4, fontface="italic", +# label=substitute(y==a*x+b, list(a=coef[2], b=coef[1]))) + +# theme(legend.position = c(0.9,0.3),legend.key.size = unit(12,'point'), +# axis.text = element_text(colour = 'black'), +# axis.ticks = element_line(colour = 'black')) + +# scale_x_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) + +# scale_y_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) +p4 + +p5<-ggplot(data.frame(x=c(df$HbTemp, df$HbTrop), + y=rep(c("Temperate","Tropical"),each=nrow(df))), + aes(x, fill=y, color=y)) + + geom_density(alpha=0.3,lwd=1,show.legend = T) + + theme_classic(12) + + labs(x=bquote(H^2), y='Density') + + scale_x_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) + + scale_y_continuous(expand = expansion(c(0,0.05)),limits = c(0,NA)) + + theme(legend.position = c(0.8,0.9),legend.title = element_blank(), + legend.key.size = unit(12,'point'), legend.text = element_text(size=8), + axis.text = element_text(colour = 'black'), + axis.ticks = element_line(colour = 'black')) + +#### +pc1 <- plot_grid(p1,p2,align = 'v',ncol=1,labels = letters[1:2]) +pc2 <- plot_grid(p3,labels=letters[3]) +pc3 <- plot_grid(p4,p5,align = 'v',ncol=1,labels = letters[4:5]) + +pdf(width = 10,height = 6, file='../Fig1.pdf') +plot_grid(pc1,pc2,pc3,nrow=1,rel_widths = c(1,1.5)) +dev.off() + + + \ No newline at end of file diff --git a/Fig1_code/Heritability.R b/Fig1_code/Heritability.R new file mode 100644 index 0000000..a98f2c8 --- /dev/null +++ b/Fig1_code/Heritability.R @@ -0,0 +1,240 @@ +#factor: +options(stringsAsFactors = FALSE) +rm(list=ls()) +gc() +library('lme4') +library('parallel') + +################### Heritability for Tropical lines ############################ +dfTrop <- read.csv('e340_trop_repUnmerged_Untransformed_fpkm.csv', header= TRUE, + check.names = FALSE, row.names = 1) + + +###### only use duplicated genotypes ##### +cn = colnames(dfTrop) +idx = colnames(dfTrop) %in% colnames(dfTrop)[duplicated(colnames(dfTrop))] +dfTrop <- dfTrop[, idx] +colnames(dfTrop) = cn[idx] +length(unique(colnames(dfTrop))) +group <- factor(colnames(dfTrop)) +geneList = rownames(dfTrop) + +############### Run mixed linear model ############################ +HbTrop <- pbapply::pblapply(seq_along(geneList), function(i){ + trait = geneList[i] + gexp = as.numeric(dfTrop[trait, ]) + model = lmer(gexp ~ (1|group)) + varComp <- as.data.frame(VarCorr(model,comp='vcov')) + sigmas <- varComp$vcov + H2 <- sigmas[1]/sum(sigmas) + H2 +}, cl=32) + +################### Heritability for Temperate lines ########################### +dfTemp <- read.csv('e340_temp_repUnmerged_Untransformed_fpkm.csv', header= TRUE, check.names = FALSE, row.names = 1) +#################### only use duplicated genotypes ############################# +cn = colnames(dfTemp) +idx = colnames(dfTemp) %in% colnames(dfTemp)[duplicated(colnames(dfTemp))] +dfTemp <- dfTemp[, idx] +colnames(dfTemp) = cn[idx] +length(unique(colnames(dfTemp))) +group <- factor(colnames(dfTemp)) +geneList = rownames(dfTemp) +################## Run the mixed linear model ################################## +HbTemp <- pbapply::pblapply(seq_along(geneList), function(i){ + trait = geneList[i] + gexp = as.numeric(dfTemp[trait, ]) + model = lmer(gexp ~ (1|group)) + varComp <- as.data.frame(VarCorr(model,comp='vcov')) + sigmas <- varComp$vcov + H2 <- sigmas[1]/sum(sigmas) + H2 +}, cl=32) + +########################################### Combine ################################## +library(ggplot2) +library(grid) +library(MASS) +library(reshape2) +library(reshape) +HbTrop <- as.numeric(HbTrop) +HbTemp <- as.numeric(HbTemp) +dfHb_Comp = data.frame(HbTrop = HbTrop, HbTemp = HbTemp) +rownames(dfHb_Comp) <- geneList +write.csv(dfHb_Comp, file = 'Hb_Trop_VS_Temperate.csv') + +dfHb_Comp <- read.csv('Hb_Trop_VS_Temperate.csv', row.names = 1) +dfHbTempLow <- subset(dfHb_Comp, dfHb_Comp$HbTrop*0.8 >= dfHb_Comp$HbTemp) +write.csv(dfHbTempLow, file = 'TempHbLow.csv') +write.csv(rownames(dfHbTempLow), file = 'HbTempLow.list', row.names = F, quote = F) + + +#### plot the 2d density plot ####### +dfHb_Comp <- read.csv('Hb_Trop_VS_Temperate.csv', row.names = 1) +dfHb_Comp <- subset(dfHb_Comp, HbTemp >0 & HbTrop > 0) + +p1<- ggplot(data = dfHb_Comp, aes(HbTrop, HbTemp))+ + stat_density_2d(aes(fill = ..level..), geom='polygon')+ + geom_smooth(position = 'identity', method = lm)+ + scale_fill_distiller(palette=12, direction=1) + + theme(legend.position='right')+ + xlim(0,1) + ylim(0,1) + + xlab("H2 Tropical") + + ylab("H2 Temperate") + + theme_classic() + +coef <- round(coef(lm(HbTemp~HbTrop, dfHb_Comp)), 2) + +p1 = p1 + annotate("text", x=0.05, y= 1, hjust=0, col="red", size=4, fontface="italic", + label=substitute(y==a*x+b, list(a=coef[1], b=coef[2]))) +p1 + +########## distribution of the tropical H2 +dfHb_Comp <- read.csv('Hb_Trop_VS_Temperate.csv') +dfHb_trop_high <- subset(dfHb_Comp, HbTrop > HbTemp) +dfHb_trop_low <- subset(dfHb_Comp, HbTrop < HbTemp) +dfHb_trop_high$comparison = 'TropicalHigh' +dfHb_trop_low$comparison = 'TropicalLow' +dfHb_Comp_merge = rbind(dfHb_trop_high, dfHb_trop_low) + +ggplot(data = dfHb_Comp_merge, aes(HbTrop, fill = comparison)) + + geom_density(alpha=0.4) + + theme_classic() + +###################################################### +################### Heritability for all 340 lines ############################ +dfexp <- read.csv('eGWAS340_repUnmerged_Untransformed_fpkm.csv', header= TRUE, check.names = FALSE, row.names = 1) +cn = colnames(dfexp) +idx = colnames(dfexp) %in% colnames(dfexp)[duplicated(colnames(dfexp))] +dfexp <- dfexp[, idx] +colnames(dfexp) = cn[idx] +length(unique(colnames(dfexp))) + +group <- factor(colnames(dfexp)) +gtList = unique(colnames(dfexp)) +geneList = rownames(dfexp) +HbOutput <- pbapply::pblapply(seq_along(geneList), function(i){ + trait = geneList[i] + gexp = as.numeric(dfexp[trait, ]) + model = lmer(gexp ~ (1|group)) + varComp <- as.data.frame(VarCorr(model,comp='vcov')) + sigmas <- varComp$vcov + H2 <- sigmas[1]/sum(sigmas) + H2 +},cl=32) + +Hb = unlist(HbOutput) +gexpMeanlog <- log2(rowMeans(dfexp)) #[sapply(dfexp,is.numeric)])) +dfHb = data.frame(geneID = geneList, Hb = Hb, log2GEMean = gexpMeanlog) +write.csv(dfHb, file = 'e340_Heritability.csv') + +######################################################################################### +library(ggplot2) +library("ggpubr") +dfHb <- read.csv('e340_Heritability.csv', row.names = 1, header = T) +p1 <- ggplot(dfHb, aes(x=Hb)) + + geom_histogram(bins = 20, color='black', fill='grey') + + xlab(bquote(~H^2))+ + ylab('Number of genes') + + theme_bw()+ + theme(panel.border = element_blank(), panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +p1 +#p1 <- p1 + theme(axis.text.y = element_text(face = 'bold', size = 10, colour = 'black'), axis.text.x = element_text(face='bold', size = 10, colour = 'black')) +#p1 <- p1 + theme(axis.title.y = element_text(face = 'bold', size=10, colour = 'black'), title = element_text(face='bold', size=10, colour = 'black')) +#p1 <- p1 + theme(axis.ticks.length = unit(.25,"cm"), axis.ticks = element_line(size=1,colour = 'black')) + + +p2 <- ggplot(dfHb, aes(x=log2GEMean, y=Hb) ) + + stat_density_2d(aes(fill = ..level..), geom='polygon')+ + scale_fill_distiller(palette=12, direction=1) + + scale_x_continuous(expand = c(0, 0)) + + scale_y_continuous(expand = c(0, 0)) + + theme(legend.position='right')+ + xlab(bquote(~log[2](FPKM))) + + ylab(bquote(~H^2)) +p2 +#p2 <- p2 + theme(axis.text.y = element_text(face = 'bold', size = 10, colour = 'black'), axis.text.x = element_text(face='bold', size = 10, colour = 'black')) +#p2 <- p2 + theme(axis.title.y = element_text(face = 'bold', size=10, colour = 'black'), title = element_text(face='bold', size=10, colour = 'black')) +#p2 <- p2 + theme(axis.ticks.length = unit(.15,"cm"), axis.ticks = element_line(size=0.5,colour = 'black')) +library(ggrepel) +library(gridExtra) +grid.arrange(p1,p2, nrow=1) + + +#################### Narrow sense heritability (use sommer) ################# +rm(list=ls()) +options(stringsAsFactors = FALSE) +suppressPackageStartupMessages({ + library(data.table) + library(pbapply) + library(snpStats) + library(sommer) +}) + +# get duplicated individuals +dfexp <- fread("eGWAS340_repUnmerged_Untransformed_fpkm.csv",nrow=1) +#group <- unique(colnames(dfexp))[-1] +group <- unique(colnames(dfexp)[duplicated(colnames(dfexp))]) +length(group) + +# calculate additive relationship matrix +plink <- read.plink("../../geno340/snps_prune.bed") +snp <- as(plink$genotypes[plink$fam$pedigree %in% group, ], 'numeric') +A <- A.mat(snp - 1) +dim(A) + +# get gene expression matrix +f <- fread("eGWAS340_RepMerged_Untransformed_fpkm.csv", data.table = F) +f[1:5,1:5] +phe <- as.matrix(f[, match(group, colnames(f))]) +rownames(phe) <- f$V1 +traits <- rownames(phe) + +# calculate h2 +dfh2 <- pblapply(seq_along(traits), function(i){ + geneID <- traits[i] + df2 <- data.frame(lineName=group, gexp=phe[geneID, ]) + q <-mmer(gexp ~ 1, random = ~vsr(lineName, Gu=A), + rcov = ~units, data = df2, tolParInv = 1e-3, + verbose = FALSE) + a <- vpredict(q, h2 ~ V1/(V1 + V2) ) + a$Estimate +}, cl = 32) + +dfh2 <- data.frame(geneID = traits, h2 = unlist(dfh2)) +write.csv(dfh2, file='e340_narrow_hb.csv') + +###### +dfh2 <- read.csv(file = "e340_narrow_hb.csv", row.names = 1) +dfHb <- read.csv(file = 'e340_Heritability.csv', row.names = 1) +dfHb <- subset(dfHb, Hb < 1 & Hb > 0) +dfh2 <- subset(dfh2, h2 < 1 & h2 > 0) + +dfh = merge(dfh2, dfHb, by = 'geneID') +head(dfh) + + +library(ggplot2) + +p1 <- ggplot(dfh, aes(x=Hb, y = h2))+ + stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white") + + geom_smooth(method = lm, color = "red", fill = "#69b3a2", se=TRUE) + + xlab('Broad sense heritability') + + ylab('Narrow sense heritability') + + theme_classic() +p1 + +p2 <- ggplot(dfh, aes(x=Hb, y = h2)) + + geom_point() + + geom_smooth(method = lm, color = "red", fill = "#69b3a2", se=TRUE) + + xlab('Broad sense heritability') + + ylab('Narrow sense heritability') + + theme_classic() +p2 + + + + + + diff --git a/Fig1_code/broad_narrow_Hb.R b/Fig1_code/broad_narrow_Hb.R new file mode 100644 index 0000000..d3cdf05 --- /dev/null +++ b/Fig1_code/broad_narrow_Hb.R @@ -0,0 +1,140 @@ + +################### Hb or all lines ############################ +options(stringsAsFactors = FALSE) +suppressPackageStartupMessages({ + library(data.table) + library(pbapply) + library('lme4') +}) + +dfexp <- fread('eGWAS340_repUnmerged_Untransformed_fpkm.csv') +dfexp[1:5,1:5] +group <- unique(colnames(dfexp))[-1] +#group <- unique(colnames(dfexp)[duplicated(colnames(dfexp))]) +length(group) + +geneList <- dfexp$V1 +gt <- factor(colnames(dfexp)[colnames(dfexp) %in% group]) +dfexp <- as.matrix(subset(dfexp, select = colnames(dfexp) %in% group)) +rownames(dfexp) <- geneList +dim(dfexp) + +HbOutput <- pblapply(seq_along(geneList), function(i){ + df1 <- data.frame(gexp = dfexp[i, ], gt = gt) + model = lmer(gexp ~ (1|gt), data = df1) + sigmas <- as.data.frame(VarCorr(model))[,"vcov"] + H2 <- sigmas[1]/sum(sigmas) + H2 +},cl=32) + +Hb <- unlist(HbOutput) +gexpMeanlog <- log2(rowMeans(dfexp)) +dfHb <- data.frame(geneID = geneList, Hb = Hb, log2GEMean = gexpMeanlog) +write.csv(dfHb, file = 'e219_Hb.csv') + +#### tropical lines +trop <- scan("../../sweep/tropical.txt", what='') +trop_gt <- droplevels(gt[gt%in%trop]) +trop_exp <- dfexp[, gt%in%trop] +dim(trop_exp) +tropHb <- pblapply(seq_along(geneList), function(i){ + df1 <- data.frame(gexp = trop_exp[i, ], gt = trop_gt) + model = lmer(gexp ~ (1|gt), data = df1) + sigmas <- as.data.frame(VarCorr(model))[,"vcov"] + H2 <- sigmas[1]/sum(sigmas) + H2 +},cl=32) + +tropHb <- data.frame(geneID=geneList, Hb=unlist(tropHb)) +write.csv(tropHb, file = 'trop22_Hb.csv') + +#### tropical lines +temp <- scan("../../sweep/temperate.txt", what='') +temp_gt <- droplevels(gt[gt%in%temp]) +temp_exp <- dfexp[, gt%in%temp] +tempHb <- pblapply(seq_along(geneList), function(i){ + df1 <- data.frame(gexp = temp_exp[i, ], gt = temp_gt) + model = lmer(gexp ~ (1|gt), data = df1) + sigmas <- as.data.frame(VarCorr(model))[,"vcov"] + H2 <- sigmas[1]/sum(sigmas) + H2 +},cl=32) +tempHb <- data.frame(geneID=geneList, Hb=unlist(tempHb)) +write.csv(tempHb, file = 'temp39_Hb.csv') + +#################### Narrow sense heritability (use sommer) ################# +rm(list=ls()) +options(stringsAsFactors = FALSE) +suppressPackageStartupMessages({ + library(data.table) + library(pbapply) + library(snpStats) + library(sommer) +}) + +# get duplicated individuals +dfexp <- fread("eGWAS340_repUnmerged_Untransformed_fpkm.csv",nrow=1) +#group <- unique(colnames(dfexp))[-1] +group <- unique(colnames(dfexp)[duplicated(colnames(dfexp))]) +length(group) + +# calculate additive relationship matrix +plink <- read.plink("../../geno340/snps_prune.bed") +snp <- as(plink$genotypes[plink$fam$pedigree %in% group, ], 'numeric') +A <- A.mat(snp - 1) +dim(A) + +# get gene expression matrix +f <- fread("eGWAS340_RepMerged_Untransformed_fpkm.csv", data.table = F) +f[1:5,1:5] +phe <- as.matrix(f[, match(group, colnames(f))]) +rownames(phe) <- f$V1 +traits <- rownames(phe) + +# calculate h2 +dfh2 <- pblapply(seq_along(traits), function(i){ + geneID <- traits[i] + df2 <- data.frame(lineName=group, gexp=phe[geneID, ]) + q <-mmer(gexp ~ 1, random = ~vsr(lineName, Gu=A), + rcov = ~units, data = df2, tolParInv = 1e-3, + verbose = FALSE) + a <- vpredict(q, h2 ~ V1/(V1 + V2) ) + a$Estimate +}, cl = 32) + +dfh2 <- data.frame(geneID = traits, h2 = unlist(dfh2)) +write.csv(dfh2, file='e219_h2.csv') + +###################### plot ############################ +library(ggplot2) +x <- read.csv("e219_Hb.csv", row.names = 1) +y <- read.csv('e219_h2.csv', row.names = 1) + +x <- subset(x, Hb > 0 & Hb < 1) +y <- subset(y, h2 > 0 & h2 < 1) + +dfh = merge(x, y, by = 'geneID') +head(dfh) + +p1 <- ggplot(dfh, aes(x= h2, y = Hb)) + ylim(0,1) + + stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white") + + geom_smooth(method = lm, color = "red", fill = "#69b3a2", se=TRUE) + + ylab('Broad sense heritability') + + xlab('Narrow sense heritability') + + theme_classic() +p1 + +p2 <- ggplot(dfh, aes(x= h2, y = Hb)) + + geom_point() + ylim(0,1) + + geom_smooth(method = lm, color = "red", fill = "#69b3a2", se=TRUE) + + ylab('Broad sense heritability') + + xlab('Narrow sense heritability') + + theme_classic() +p2 + +pdf(width = 12, height = 6, file='../Fig_Hb_vs_h2.pdf') +cowplot::plot_grid(p1 + geom_abline(slope = 1,intercept = 0, lty=2, lwd=1.2, color=2), + p2 + geom_abline(slope = 1,intercept = 0, lty=2, lwd=1.2, color=2), + labels = 'auto') +dev.off() + diff --git a/Fig2_code/Fig2.R b/Fig2_code/Fig2.R new file mode 100644 index 0000000..689074c --- /dev/null +++ b/Fig2_code/Fig2.R @@ -0,0 +1,246 @@ +#### +setwd("/work3/maize_eGWAS/figures/Fig2_code/") +suppressPackageStartupMessages({ + library(ggplotify) + library(ggplot2) + library(cowplot) + library(ggpubr) + library(data.table) + library(GenomicRanges) +}) +source('locuscompare.R') +npg<-ggsci::pal_npg('nrc')(10) +theme_set(theme_classic(12) + + theme( axis.ticks = element_line(colour = 1), + axis.text = element_text(colour = 1,size=rel(0.9)), + legend.text = element_text(size=rel(0.9)), + line=element_line(size = 0.5), + legend.background = element_blank())) + +#### Hb vs PVE +hb <- read.csv("../Fig1_code/e340_Heritability.csv", row.names = 1) +load("../../eGWAS340/eqtl_peak.rda") +freq <- table(eqtls$gene) +eqtls <- eqtls[gene%in%names(freq[freq<=10])] +eqtls$Hb <- hb$Hb[match(eqtls$gene, hb$geneID)] + +p1<-ggplot(eqtls,aes(x=Hb,y=PVE)) + geom_point(col='gray',cex=0.2) + + xlab(bquote(H^2)) + ylab("eQTL PVE") + + geom_abline(slope = 1,intercept = 0,col=npg[8],lty=2) + + scale_x_continuous(expand = expansion(c(0,0.05))) + + scale_y_continuous(expand = expansion(c(0,0.05))) + +#### PVE/Hb +hb <- hb[hb$geneID %in% eqtls$gene, ] +hb$PVE <- tapply(eqtls$PVE, + factor(eqtls$gene, levels = hb$geneID), + sum) +hb$prop <- hb$PVE/hb$Hb + +p2<-ggplot(hb,aes(prop)) + + geom_density(col=1,fill=1,lwd=1,alpha=0.1) + + xlab(bquote("eQTL_PVE/"*H^2)) + ylab("Density") + + scale_x_continuous(expand = expansion(c(0,0.05)), limits = c(0,1)) + + scale_y_continuous(expand = expansion(c(0,0.05))) + +#### sqtl_loc +load('../../psi340_sd0.01_miss0.05.rda') +cis2_int<-int[match(cis2$intron,int$id)] +d3<-ifelse(as.character(strand(cis2_int))=='+',cis2$pos-start(cis2_int), + end(cis2_int)-cis2$pos) +names(d3)<-cis2_int$id +d3[d3>=width(cis2_int)]<- (d3-width(cis2_int))[d3>=width(cis2_int)]+2000 +d3[d3>0 & d30 & d3=-2e3 & d3<=4e3] +y3<-table(findInterval(d3,seq(-2e3,4e3,100))) +y3<-y3[match(seq(60),names(y3))] +y3[is.na(y3)]<-0 + +p3<-ggplot(data.frame(x=seq(60),y=as.numeric(y3))) + + geom_line(aes(x=x,y=y),color=npg[2],lwd=0.8) + + geom_vline(xintercept = c(20,41),col=npg[4],lty=2) + + xlab('Distance to affected introns (kb)') + + ylab(bquote(Number~of~italic("cis")*"-sQTLs")) + + scale_x_continuous(breaks=c(0,10,20,41,51,61), + labels=c(-2,-1,"5'SS","3'SS",1,2)) + + annotate('text',x=30.5,y=0, label='<-intron->',col=npg[4],vjust=-0.5) + + theme(axis.text.x = element_text(color=c(1,1,npg[4],npg[4],1,1))) + + annotate('rect',xmin=20,xmax = 41,ymin=0,ymax = 350,fill=npg[4],alpha=0.1) + + scale_y_continuous(expand = expansion(c(0,0.05))) + +#### tss_tes +load('../../cis_eqtl_sqtl.rda') +y1<-table(findInterval(d1,seq(-2000,4000,100))) +y1<-y1[match(seq(60),names(y1))] +y1[is.na(y1)]<-0; names(y1)<-seq(60) +y2<-table(findInterval(d2,seq(-2000,4000,100))) +y2<-y2[match(seq(60),names(y2))] +y2[is.na(y2)]<-0; names(y2)<-seq(60) + +p4<-ggplot(data.frame(x=seq(60),eQTLs=as.numeric(y1),sQTLs=as.numeric(y2))) + + geom_line(aes(x=x,y=eQTLs,color='eQTLs'),lwd=0.8) + + geom_line(aes(x=x,y=sQTLs,color='sQTLs'),lwd=0.8) + + theme(legend.position = c(1/6,0.85)) + + labs(x="Distance to affected genes (kb)", + y=bquote(Number~of~italic("cis")*"-QTLs"),color='') + + scale_color_manual(values = c('eQTLs'=npg[1],'sQTLs'=npg[2])) + + geom_vline(xintercept =c(21,40),color=npg[4],lty=2) + + scale_x_continuous(breaks=c(1,11,21,40,50,60), + labels=c(-2,-1, "TSS","TES",1,2)) + + annotate('text',x=30.5,y=0,label='<-gene->',color=npg[4],vjust=-0.5) + + theme(axis.text.x = element_text(color=c(1,1,npg[4],npg[4],1,1))) + + annotate('rect',xmin=21,xmax = 40,ymin=0,ymax = 350,fill=npg[4],alpha=0.1) + + scale_y_continuous(expand = expansion(c(0,0.05))) + + +#### ld_hist +load('../../cis_eqtl_sqtl.rda') +lab<-paste0('italic(r^2)',c('<','>='),0.6,': ', + c(paste0('"',format(sum(cis$ld<0.6),big.mark = ','),'"'), + sum(cis$ld>=0.6))) +p5<-ggplot(cis,aes(ld)) + + labs(x=bquote(italic(r^2)~(ld)),y='Number of genes') + + stat_bin(breaks=seq(0,1,0.01),lwd=0.3, + fill=c(rep(NA,60),rep(npg[5],40)),color=1) + + geom_vline(xintercept = 0.6, col='red',lty=2) + + annotate('text',x=c(0.35,0.82),y=200,label = parse(text=lab),size=4) + + scale_x_continuous(expand = expansion(c(0,0.05)),breaks = seq(0,1,0.2)) + + scale_y_continuous(expand = expansion(c(0,0.05))) + +## co-localization +com<-cis[ld>=0.6] +load('../../maize_genes.rda') +genes<-genes[com$gene] +d1<-d1[names(d1)%in%com$gene] +d2<-d2[names(d2)%in%com$intron] + +y1<-table(findInterval(d1,seq(-2000,4000,100))) +y1<-y1[match(seq(60),names(y1))] +y1[is.na(y1)]<-0; names(y1)<-seq(60) +y2<-table(findInterval(d2,seq(-2000,4000,100))) +y2<-y2[match(seq(60),names(y2))] +y2[is.na(y2)]<-0; names(y2)<-seq(60) + +p6<-ggplot(data.frame(x=seq(60),eQTLs=as.numeric(y1),sQTLs=as.numeric(y2))) + + geom_line(aes(x=x,y=eQTLs,color='eQTLs'),lwd=0.8) + + geom_line(aes(x=x,y=sQTLs,color='sQTLs'),lwd=0.8) + + theme(legend.position = c(1/6,0.85)) + + labs(x="Distance to affected genes (kb)", + y=bquote(Number~of~italic("cis")*"-QTLs"),color='') + + scale_color_manual(values = c('eQTLs'=npg[1],'sQTLs'=npg[2])) + + geom_vline(xintercept =c(21,40),color=npg[4],lty=2) + + scale_x_continuous(breaks=c(1,11,21,40,50,60), + labels=c(-2,-1, "TSS","TES",1,2)) + + annotate('text',x=30.5,y=0,label='<-gene->',color=npg[4],vjust=-0.5) + + theme(axis.text.x = element_text(color=c(1,1,npg[4],npg[4],1,1))) + + annotate('rect',xmin=21,xmax = 40,ymin=0,ymax = 30,fill=npg[4],alpha=0.1) + + scale_y_continuous(expand = expansion(c(0,0.05))) + + +#### CT2 +load('CT2_eqtl_sqtl.rda') +eqtl<-eqtl[eqtl$pvalue<0.01,] +sqtl<-sqtl[sqtl$pvalue<0.01,] +eqtl$pos<-bed$map$position[match(eqtl$snps,bed$map$snp.name)]/1e6 +sqtl$pos<-bed$map$position[match(sqtl$snps,bed$map$snp.name)]/1e6 +snp<-eqtl$snps[which.min(eqtl$pvalue)] + +ct2_zoom1<-ggplot(eqtl,aes(x=pos,y=-log10(pvalue))) + + geom_point(fill='gray',color='gray',pch=21,size=1) + + ylab(bquote(-log[10](italic(P)))) + + annotate('text',16,20,label='eQTL') + + theme(axis.text.x = element_blank(), axis.title.x = element_blank()) + + scale_y_continuous(expand = expansion(c(0,0.05)), limits = c(0,25)) + + geom_point(inherit.aes = FALSE,x=eqtl$pos[which.min(eqtl$pvalue)], + y=max(-log10(eqtl$pvalue)),pch=23,fill='purple',size=2) + + annotate('text',x=eqtl$pos[which.min(eqtl$pvalue)],y=max(-log10(eqtl$pvalue)), + label=snp,size=3.5,hjust=-0.1) + +ct2_zoom2<-ggplot(sqtl,aes(x=pos,y=-log10(pvalue))) + + geom_point(fill='gray',color='gray',pch=21,size=1) + + xlab('chr1 (Mb)') + ylab(bquote(-log[10](italic(P)))) + + annotate('text',16,35,label='sQTL') + + scale_y_continuous(expand = expansion(c(0,0.05)), limits = c(0,40)) + + geom_point(inherit.aes = FALSE,x=sqtl$pos[sqtl$snps==snp], + y=-log10(sqtl$pvalue[sqtl$snps==snp]),pch=23,fill='purple',size=2) + + annotate('text',x=sqtl$pos[sqtl$snps==snp],y=-log10(sqtl$pvalue)[sqtl$snps==snp], + label=snp,size=3.5,hjust=-0.1) + +ct2_zoom<-plot_grid(ct2_zoom1,ct2_zoom2,align = 'v',ncol=1,rel_heights = c(0.8,1)) + +ct2<-locuscompare(eqtl,sqtl,bed=bed, combine = F,region = 1e6,legend_position = "topleft") +ct2_comp<-ct2$locuscompare + + annotate('text',x=-log10(eqtl$pvalue)[eqtl$snps==snp], + y=-log10(sqtl$pvalue)[sqtl$snps==snp], + label=snp,size=3.5,vjust=3,hjust=1) +p7<-plot_grid(ct2_zoom,ct2_comp) + + +#### Cys2 +load('Cys2_eqtl_sqtl.rda') +eqtl<-eqtl[eqtl$pvalue<0.01,] +sqtl<-sqtl[sqtl$pvalue<0.01,] +eqtl$pos<-bed$map$position[match(eqtl$snps,bed$map$snp.name)]/1e6 +sqtl$pos<-bed$map$position[match(sqtl$snps,bed$map$snp.name)]/1e6 +snp<-eqtl$snps[which.min(eqtl$pvalue)] + +cys2_zoom1<-ggplot(eqtl,aes(x=pos,y=-log10(pvalue))) + + geom_point(fill='gray',color='gray',pch=21,size=1) + + ylab(bquote(-log[10](italic(P)))) + + annotate('text',178.25,20,label='eQTL') + + theme(axis.text.x = element_blank(), axis.title.x = element_blank()) + + scale_y_continuous(expand = expansion(c(0,0.05)), limits = c(0,25)) + + scale_x_continuous(limits = c(178,180.02)) + + geom_point(inherit.aes = FALSE,x=eqtl$pos[eqtl$snps=='1-177050943'], + y=-log10(eqtl$pvalue)[eqtl$snps=='1-177050943'],pch=23,fill='purple',size=2) + + annotate('text',x=eqtl$pos[which.max(-log10(eqtl$pvalue))],size=3.5, + y=max(-log10(eqtl$pvalue)),label='1-177050943',hjust=-0.1) + +cys2_zoom2<-ggplot(sqtl,aes(x=pos,y=-log10(pvalue))) + + geom_point(fill='gray',color='gray',pch=21,size=1) + + xlab('chr1 (Mb)') + + ylab(bquote(-log[10](italic(P)))) + + annotate('text',178.25,100,label='sQTL') + + scale_y_continuous(expand = expansion(c(0,0.05)), limits = c(0,120)) + + scale_x_continuous(limits = c(178,180)) + + geom_point(inherit.aes = FALSE,x=sqtl$pos[sqtl$snps=='1-177050943'], + y=-log10(sqtl$pvalue)[sqtl$snps=='1-177050943'],pch=23,fill='purple',size=2) + + annotate('text',x=sqtl$pos[sqtl$snps=='1-177050943'],size=3.5, + y=-log10(sqtl$pvalue)[sqtl$snps=='1-177050943'],label='1-177050943',hjust=-0.1) + +cys2_zoom<-plot_grid(cys2_zoom1,cys2_zoom2,align = 'v',ncol=1,rel_heights = c(0.8,1)) +cys2<-locuscompare(eqtl,sqtl,bed=bed, combine = F,region = 1e6,legend_position = "topleft") +cys2_comp<-cys2$locuscompare + + annotate('text',x=-log10(eqtl$pvalue)[eqtl$snps==snp], + y=-log10(sqtl$pvalue)[sqtl$snps==snp], + label=snp,size=3.5,vjust=0.5,hjust=1.1) +p8<-plot_grid(cys2_zoom,cys2_comp) + +#### arrange plots by row +load("ct2_cys2_plist.rda") +p5 <- p5+theme(axis.title.x = element_text(margin = margin(0,0,-2.5,0))) +pr1 <- plot_grid(p1,p2,p3,labels = 'auto',nrow=1) +pr2 <- plot_grid(p4, p5, p6,labels = c('d','e','f'),nrow=1) + +p7 <- annotate_figure(p7, + top = text_grob("Compact plant2 (ct2)", + face="bold.italic", color='darkblue')) +pr3 <- plot_grid(p7, + plot_grid(plotlist = plist[c(1,3)], nrow=2), + labels = c("g", "h"), nrow=1, + rel_widths = c(2,1)) + +p8 <- annotate_figure(p8, + top = text_grob("Cysteine synthase2 (cys2)", + face="bold.italic", color='darkblue')) +pr4 <- plot_grid(p8, + plot_grid(plotlist = plist[c(2,4)], nrow=2), + labels = c("i", "j"), nrow=1, + rel_widths = c(2,1)) + +#### output +pdf(width = 10,height = 12,file='../Fig2.pdf') +plot_grid(pr1,pr2,pr3,pr4,ncol=1,rel_heights = c(1,1,1.5,1.5)) +dev.off() + + diff --git a/Fig2_code/locuscompare.R b/Fig2_code/locuscompare.R new file mode 100644 index 0000000..ebce59a --- /dev/null +++ b/Fig2_code/locuscompare.R @@ -0,0 +1,174 @@ +#### rewrite locuscompare +locuscompare<-function(in_fn1, in_fn2, bed, snp=NULL, region=500e3, + marker_col1 = "snps", pval_col1 = "pvalue", title1 = "eQTL: ", + marker_col2 = "snps", pval_col2 = "pvalue", title2 = "sQTL: ", + combine = TRUE, legend = TRUE, + legend_position = c("bottomright", "topright", "topleft"), + lz_ylab_linebreak = FALSE ){ + + #### loading libraries + library(snpStats) + library(ggplot2) + library(locuscomparer) + options('ggrepel.max.overlaps'=Inf) + theme_set(theme_classic(12) + + theme( axis.ticks = element_line(colour = 1), + axis.text = element_text(colour = 1), + legend.text = element_text(size=10), + legend.background = element_blank())) + + #### add_label + add_label = function(merged, snp){ + merged$label = ifelse(merged$rsid %in% snp, merged$rsid, '') + return(merged) + } + + #### read_metal + read_metal<-function (in_fn, marker_col = "rsid", pval_col = "pval") { + if (is.character(in_fn)) { + d = read.table(in_fn, header = TRUE) + } + else if (is.data.frame(in_fn)) { + d = in_fn + } + else { + stop("The argument \"in_fn\" must be a string or a data.frame") + } + colnames(d)[which(colnames(d) == marker_col)] = "rsid" + colnames(d)[which(colnames(d) == pval_col)] = "pval" + d$logp = -log10(d$pval) + return(d[, c("rsid", "pval", "logp")]) + } + + #### rewrite make_scatterplot + make_scatterplot<-function (merged, title1, title2, color, shape, size, legend = TRUE, + legend_position = c("bottomright", "topright", "topleft") ) { + require(grid) + p = ggplot(merged, aes(logp1, logp2)) + + geom_point(aes(fill = rsid, size = rsid, shape = rsid), alpha = 0.8) + + geom_point(data = merged[merged$label != "", ], + aes(logp1, logp2, fill = rsid, size = rsid, shape = rsid)) + + xlab(bquote(.(title1)*-log[10](italic(P)))) + + ylab(bquote(.(title2)*-log[10](italic(P)))) + + scale_fill_manual(values = color, guide = "none") + + scale_shape_manual(values = shape, guide = "none") + + scale_size_manual(values = size, guide = "none") + #ggrepel::geom_text_repel(aes(label = label),size=3.5) + if (legend == TRUE) { + legend_position = match.arg(legend_position) + if (legend_position == "bottomright") { + legend_box = data.frame(x = 0.8, y = seq(0.4, 0.2, -0.05)) + }else if (legend_position == "topright") { + legend_box = data.frame(x = 0.8, y = seq(0.8, 0.6, -0.05)) + }else { + legend_box = data.frame(x = 0.1, y = seq(0.8, 0.6, -0.05)) + } + p = p + + annotation_custom(rectGrob(legend_box$x,legend_box$y,0.05,0.05, + just = c(0,0), + gp=gpar(fill = rev(c("blue4", "skyblue", "darkgreen", "orange", "red"))))) + + annotation_custom(textGrob(seq(0.8,0.2,-0.2), x = legend_box$x[-1] + 0.05, + y = legend_box$y[-1]+0.05,just = c(-0.3,0.5),gp=gpar(fontsize=10,lineheight=0.9))) + + annotation_custom(textGrob(parse(text = "italic(r^2)"), x = legend_box$x[1] + 0.05, + y = legend_box$y[1]+0.05,just = c(0.5,-1),gp=gpar(fontsize=10,lineheight=0.9))) + } + return(p + theme(plot.margin = theme_get()$plot.margin-margin(0,0,4.5,0))) + } + + #### make_locuszoom + make_locuszoom<-function (metal, title, chr, color, shape, size, ylab_linebreak = FALSE) { + p = ggplot(metal, aes(x = pos, logp)) + + geom_point(aes(fill = rsid, size = rsid, shape = rsid), alpha = 0.8) + + geom_point(data = metal[metal$label != "", ], aes(x = pos, logp, fill = rsid, size = rsid, shape = rsid)) + + scale_fill_manual(values = color, guide = "none") + + scale_shape_manual(values = shape, guide = "none") + + scale_size_manual(values = size, guide = "none") + + ggrepel::geom_text_repel(aes(label = label),size=3.5) + + xlab(paste0("chr", chr, " (Mb)")) + + ylab(bquote(.(title)*-log[10](italic(P)))) + if (ylab_linebreak == TRUE) { + p = p + ylab(bquote(atop(.(title), -log[10](italic(P))))) + } + return(p) + } + + #### make_combined_plot + make_combined_plot <-function (merged, title1, title2, ld, chr, snp = NULL, + combine = TRUE, legend = TRUE, + legend_position = c("bottomright", "topright", "topleft"), + lz_ylab_linebreak = FALSE) { + snp = get_lead_snp(merged, snp) + color = assign_color(merged$rsid, snp, ld) + shape = ifelse(merged$rsid == snp, 23, 21) + names(shape) = merged$rsid + size = ifelse(merged$rsid == snp, 3, 2) + names(size) = merged$rsid + merged = add_label(merged, snp) + p1 = make_scatterplot(merged, title1, title2, color, shape, + size, legend, legend_position) + metal1 = merged[, c("rsid", "logp1", "chr", "pos", "label")] + colnames(metal1)[which(colnames(metal1) == "logp1")] = "logp" + p2 = make_locuszoom(metal1, title1, chr, color, shape, size, + lz_ylab_linebreak) + metal2 = merged[, c("rsid", "logp2", "chr", "pos", "label")] + colnames(metal2)[which(colnames(metal2) == "logp2")] = "logp" + p3 = make_locuszoom(metal2, title2, chr, color, shape, size, + lz_ylab_linebreak) + if (combine) { + p2 = p2 + theme(axis.text.x = element_blank(), axis.title.x = element_blank()) + p4 = cowplot::plot_grid(p2, p3, align = "v", nrow = 2, + rel_heights = c(0.8, 1)) + p5 = cowplot::plot_grid(p1, p4) + return(p5) + } + else { + return(list(locuscompare = p1, locuszoom1 = p2, locuszoom2 = p3)) + } + } + + #### run + cat('---read input files:\t') + d1 = read_metal(in_fn1, marker_col1, pval_col1) + d2 = read_metal(in_fn2, marker_col2, pval_col2) + merged = merge(d1, d2, by = "rsid", suffixes = c("1", "2"), all = FALSE) + cat('OK!\n---read plink files:\t') + if (is.character(bed)){ + plink<-read.plink(bed=bed, select.snps=merged$rsid) + }else if (is.list(bed) & all(names(bed) == c('genotypes','fam','map'))){ + plink<-bed + }else{ + stop("The argument \"bed\" must be a string or a list of result from read.plink") + } + cat('OK!\n---make locuscompare:\t') + merged = data.frame(merged, + plink$map[match(merged$rsid,plink$map$snp.name),c("chromosome","position")]) + colnames(merged)[colnames(merged) == "chromosome"] = "chr" + colnames(merged)[colnames(merged) == "position"] = "pos" + merged$pos<-merged$pos/1e6 + snp<-get_lead_snp(merged,snp=snp) + snp.index<-which(merged$rsid==snp) + merged<-subset(merged,chr==merged$chr[snp.index] & + pos>=merged$pos[snp.index]-region/2 & + pos<=merged$pos[snp.index]+region/2) + chr<-unique(merged$chr) + R2 <- ld(plink$genotypes[,merged$rsid],plink$genotypes[,snp],stats="R.squared") + ld<-data.frame(SNP_A=colnames(R2),SNP_B=rownames(R2),R2=R2[,1]) + cat('OK!\n---make combined plot:\t') + p = make_combined_plot(merged, title1, title2, ld, chr, snp, combine=FALSE, + legend, legend_position, lz_ylab_linebreak) + p1<-p$locuscompare + p2<-p$locuszoom1 + p3<-p$locuszoom2 + if (combine) { + p2 = p2 + theme(axis.text.x = element_blank(), axis.title.x = element_blank()) + p4 = cowplot::plot_grid(p2, p3, align = "v", nrow = 2, + rel_heights = c(0.8, 1)) + p5 = cowplot::plot_grid(p1, p4) + cat('done!\n') + return(p5) + } + else { + cat('done!\n') + return(list(locuscompare = p1, locuszoom1 = p2, locuszoom2 = p3)) + } +} \ No newline at end of file diff --git a/Fig3_code/Fig3.R b/Fig3_code/Fig3.R new file mode 100644 index 0000000..612fc3c --- /dev/null +++ b/Fig3_code/Fig3.R @@ -0,0 +1,133 @@ +####### +setwd('/work3/maize_eGWAS/figures/Fig3_code/') +suppressPackageStartupMessages({ + library(ggplotify) + library(ggplot2) + library(cowplot) + library(GenomicRanges) + library(data.table) +}) +source('manh.R') + +theme_set(theme_bw(12) + + theme( axis.ticks = element_line(colour = 1), + axis.text = element_text(colour = 1,size=rel(0.9)), + legend.title = element_text(size=rel(0.9)), + legend.text = element_text(size=rel(0.9)), + plot.title = element_text(hjust = 0.5,size = 12))) + +#### IC_scatter +mod<-read.csv('moduleHb_Size.csv') +test<-cor.test(log2(mod$ModelSize),mod$Heritability) +lab <- mod[mod$ModelName %in% c(39, 79, 101), ] +lab$Func <- c("(Oxidation reduction;\nResponse to water deprivation)", + "(Translation)", + "(Protein biogenesis/protein folding;\n Plant-pathogen interaction)") +p1<-ggplot(mod,aes(x=log2(ModelSize),y=Heritability)) + geom_point() + + geom_smooth(method = 'lm') + + labs(x=bquote(log[2]*'(Model size)'), + y=bquote(Model~mean~H^2), + title = substitute(italic(R)==x*','~italic(P)==y, + list(x=round(test$estimate,2),y=signif(test$p.value,3)))) + + annotate('text',x=log2(lab$ModelSize),y=lab$Heritability, + label=paste0('IC',lab$ModelName, " ", lab$Func), + vjust=c(0.2,-1, 0.2), hjust=c(-0.05,0.2,-0.05), color=2, size=3) + + annotate('point',x=log2(lab$ModelSize),y=lab$Heritability, + shape=23, fill="blue", color="darkred", size=3) +pdf(width = 6, height = 5, file='IC_scatter.pdf') +print(p1) +dev.off() + +### IC39 +load('../../sweep/sel_genes_042121.rda') +load('../../maize_genes.rda') +load('../../eGWAS340/eqtl_peak.rda') + +f1 <- fread("../../gemma/model_res/mod39.assoc.txt") +cut <- 0.05/nrow(f1) +df1$pval <- df1$pval * -log10(cut)/cut_xpclr +df2$pval <- df2$pval * -log10(cut)/cut_fst +x1 <- sort(genes[c('Zm00001d047757','Zm00001d047755','Zm00001d047758')], + ignore.strand=T) + +colnames(f1)[colnames(f1) == 'p_lrt'] <- 'pval' +f1 <- subset(f1, pval < 0.001 ) + +p2 <- as_grob(function() { + par(layout(matrix(rep(1:2, c(3,4)))), mar=c(2, 4.5, 0.5, 0.5), cex=1) + manh(f1, col=ggsci::pal_igv()(10), cut=cut,cut.col = 1, cex=0.5) + par(mar=c(4.5, 4.5, 2.5, 0.5)) + manh(f1, col=ggsci::pal_igv()(10), cut=cut,cut.col = 1, chr=9, + xlim=c(140.8,141.2), cex=0.5) + points(x=141083854/1e6, y=-log10(f1$pval[f1$ps==141083854]),pch=23, + col=1, bg=2, cex=1) + #text(x=141083854/1e6, y=-log10(f1$pval[f1$ps==141083854]), label=141083854, + # adj = c(-0.1, 0.5), col=2) + rect(start(x1)/1e6, 43, end(x1)/1e6, 44, col=2, xpd=T) + text(x=start(x1[1])/1e6, y=44, xpd=T, labels=x1$symbol[1], font=3, + adj=c(0.5, -0.5)) + text(x=start(x1[2])/1e6, y=44, xpd=T, labels=x1$symbol[2], font=3, + adj=c(1, -0.5)) + text(x=end(x1[3])/1e6, y=44, xpd=T, labels=x1$symbol[3], font=3, + adj=c(0, -0.5)) + #manh(df1, col=2, chr=9, type='l', log=F, lwd=2, xlim=c(140.6,141.4), add=T) + #abline(v=141083854/1e6) + #manh(df2, col=4, chr=9, type='l', log=F, lwd=2, xlim=c(140.6,141.4), add=T) +}) +pdf (width = 6, height = 5, file='IC39.pdf') +plot_grid(p2) +dev.off() + +#### IC79 +f2 <- fread("../../gemma/model_res/mod79.assoc.txt") +colnames(f2)[colnames(f2) == 'p_lrt'] <- 'pval' +f2 <- subset(f2, pval < 0.001 ) +x2 <- genes['Zm00001d014664'] + +p3 <- as_grob(function() { + par(layout(matrix(rep(1:2, c(3,4)))), mar=c(2, 4.5, 0.5, 0.5), cex=1) + manh(f2, col=ggsci::pal_igv()(10), cut=cut,cut.col = 1, cex=0.5) + par(mar=c(4.5, 4.5, 2.5, 0.5)) + manh(f2, col=ggsci::pal_igv()(10), cut=cut,cut.col = 1, chr=5, xlim=c(57.5,58.5), cex=0.5) + lead <- f2[which.min(f2$pval)] + points(x=lead$ps/1e6, y=-log10(lead$pval), pch=23, cex=1, bg=2) + rect(start(x2)/1e6, 13.5, end(x2)/1e6, 14, col=2, xpd=T) + text(x=start(x2)/1e6, y=14, labels = 'cia2', font=3, adj=c(0.5, -0.5),xpd=T) + #manh(df1, col=2, chr=1, type='l', log=F, lwd=2, xlim=c(57,59), add=T) + #manh(df2, col=4, chr=1, type='l', log=F, lwd=2, xlim=c(57,59), add=T) +}) +pdf (width = 6, height = 5, file='IC79.pdf') +plot_grid(p3) +dev.off() + +#### IC101 +f3 <- fread("../../gemma/model_res/mod101.assoc.txt") +colnames(f3)[colnames(f3) == 'p_lrt'] <- 'pval' +f3 <- subset(f3, pval < 0.001 ) +x3 <- genes['Zm00001d042533'] + +p4 <- as_grob(function() { + par(layout(matrix(rep(1:2, c(3,4)))), mar=c(2, 4.5, 0.5, 0.5), cex=1) + manh(f3, col=ggsci::pal_igv()(10), cut=cut,cut.col = 1) + par(mar=c(4.5, 4.5, 2.5, 0.5)) + manh(f3, col=ggsci::pal_igv()(10), cut=cut,cut.col = 1, chr=3, xlim=c(170.6,171.3)) + lead <- f3[which.min(f3$pval)] + points(x=lead$ps/1e6, y=-log10(lead$pval), pch=23, cex=1, bg=2) + rect(start(x3)/1e6, 16, end(x3)/1e6, 16.5, col=2, xpd=T) + text(x=start(x3)/1e6, y=16.5, labels = x3$symbol, font=3, adj=c(0.5, -0.5),xpd=T) + #manh(df1, col=2, log=F, type='l', cut=cut,cut.col = 1, chr=3, xlim=c(170.5,171.5),add=T, lwd=2) + #manh(df2, col=4, log=F, type='l', cut=cut,cut.col = 1, chr=3, xlim=c(170.5,171.5),add=T, lwd=2) + yaxis2 <- c(0, 15) * cut_xpclr/(-log10(cut)) + #axis(4, at=pretty(yaxis2) * -log10(cut)/cut_xpclr, labels = pretty(yaxis2), las=1) + #mtext('XPCLR', 4, line=2, cex=1) +}) + +pdf (width = 6, height = 5, file='IC101.pdf') +plot_grid(p4) +dev.off() + + +#### combine +pdf(width = 12,height = 10,file='../Fig3.pdf') +plot_grid(p1,p2,p3,p4, labels = 'auto') +dev.off() diff --git a/Fig3_code/ICA_Analysis.R b/Fig3_code/ICA_Analysis.R new file mode 100644 index 0000000..4726e52 --- /dev/null +++ b/Fig3_code/ICA_Analysis.R @@ -0,0 +1,119 @@ + +setwd("/work3/maize_eGWAS/figures/Fig3_code/") +suppressPackageStartupMessages({ + library(fastICA) + library(moments) + library(matrixStats) +}) + +rawData<-read.csv("../../eGWAS340/eGWAS340_repUnmerged_Untransformed_fpkm.csv", + row.names = 1, check.names = F) +dfHb <- read.csv('../Fig1_code/e340_Heritability.csv', row.names = 1) + +Genekept <- dfHb$geneID[dfHb$Hb>=0.05] +geHb = rawData[Genekept, ] #subset genes with heritability higher than 0.05 +ge1 <- subset(geHb, subset=matrixStats::rowSds(as.matrix(geHb))>1) #subset genes which expression sd higher than 1 +dim(ge1) + +# 16972 572 + +X <- t(scale(t(ge1))) + +####### find optimal number of components ############# +ev <- svd(X)$d^2 #perform singular value Decomposition with the raw dataset +Var <- ev/sum(ev) * 100 #calculate the variance explained by each of the components +head(Var) +#10.775900 6.789168 6.200510 3.487104 2.955675 2.463114 + +#### check how many ICs to calcualte that explaines 80 percent of the expression variation +n.comp1<-which(cumsum(ev/sum(ev))>0.8)[1] +n.comp1 +#166 + +dfsvd <- data.frame('svd' = ev, 'varExplained' = Var, 'sumVarExplained' =cumsum(ev/sum(ev)) ) +write.csv(dfsvd, file = 'svd.csv') + +################ make screeplot ################## +pdf(file='screeplot.pdf',10,6) +#x11('',10,6) +par(mar=c(3,5,1,5)) +plot(NA,xlim=c(1,200),ylim=log10(c(0.01,100)),xaxs='i',yaxs='i',xlab=NA,ylab=NA,yaxt='n') +axis(1,1) +axis(2,seq(-2,2,1),sprintf("%.2f%%",10^seq(-2,2,1)),las=2) +axis(4,seq(-2,2,0.4),paste0(seq(0,100,10),'%'),las=2) +text(-19,0,labels = 'Variance explained by each component',xpd=T,cex=1.2,font = 2,srt=90) +text(218,0,labels = 'Cumulative explained variance',xpd=T,cex=1.2,font = 2,srt=-90) +polygon(x=c(1:200,200,1),y=c(cumsum(Var)[1:200]/100*4-2,-2,-2),col=rgb(0.95,1,0.95),border = NA) +points(cumsum(Var)/100*4-2,col='green',lwd=2,type='l') +points(log10(Var),col='blue',lwd=2,type='l') +abline(v = 166, col="red", lwd=3, lty=2) +text(166,-1.9,labels = '166',cex=1.2,font = 2) +grid(0,5) +dev.off() + +################################## +####### Perform ICA ############# +set.seed(2021) + +ica1 <- fastICA(X, n.comp1) + +S = ica1$S +A = ica1$A + +colnames(S) <- rownames(A) <- seq_len(n.comp1) +colnames(A) <- colnames(rawData) + +write.csv(A, file = 'ICA_matrixA.csv', quote = F) + +### Calculate kurtosis for each IC #################### +kurt_S <- apply(S, 2, kurtosis) # 1 means row wise, 2 means column wise +sum(kurt_S >= 6) #97 (only the ICs with kurtosis higher than 6 are kept for downstream) + +################## Calculate fdr for each gene of each IC to identify the genes in the same cluster ###### +fdr1<-apply(S, 2, function(x) { ## column of S is cluster, assign fdr column wise + fdrtool::fdrtool(x, statistic = 'normal', plot=F, verbose = F)$qval +}) + +mod1<-t(apply(fdr1, 1, function(x) x < 0.01)) ## Get a binary matrix indicate whether a gene should be assigned to a cluster (column) +# colnames(mod1)<-1:n.comp1 ### each column is a IC ID (n.comp1 = 166) +colSums(mod1)[kurt_S>=6] ### only keep the IC clusters with kurtosis higher than 6 and with more than 10 genes + +sum(apply(mod1, 1, any)) + +mod1_genes<-apply(mod1, 2, function(x) names(x[x])) +# names(mod1_genes)<-1:n.comp1 + +cluster1<-t(apply(A,1,function(x) table(kmeans(x,range(x))$cluster))) # using kmean to get two clusters and count number of individuals within each cluster +#cluster1<-t(apply(dfA,1,function(x) table(kmeans(x,range(x))$cluster))) # using kmean to get two clusters and count number of individuals within each cluster + +sel <- kurt_S>=6 & rowAlls(cluster1>=10) +sum(sel) +A_sel <- subset(A, sel) +heatmap(A_sel,scale ='none') + +write.csv(A_sel, 'ICA_matrixA_sel.csv') + +srg<-mod1_genes[sel] # take clusters that have kurtosis higher than 6 and kmean clusters with both having more than 10 genotypes +sgp<-ica1$A[sel, ] +rownames(sgp)<- seq(n.comp1)[sel] +colnames(sgp)<-colnames(X) +save(ica1,srg,sgp,file='eGWAS340_ICA.rda') + +write.csv(S[, sel], file='ICA_KeptMod.csv', quote=F) + +########## plot kurtosis examples +library(ggplot2) +library(ggbreak) +library(patchwork) + +dfexample = data.frame(geneID = row.names(S), IC1 = S[,1], IC38 = S[,38]) +head(dfexample) +g1 <- ggplot(dfexample, aes(x= IC1)) + geom_histogram(color='black', fill = 'white',bins=50) # hist(S[,36], ylim = c(0,200)) +g1 <- g1 + scale_y_break(c(50,200), scales = 0.5) +g1 <- g1 + labs(title = 'Kurtosis = 4.36') + +g2 <- ggplot(dfexample, aes(x= IC38)) + geom_histogram(color='black', fill = 'white',bins=50) # hist(S[,36], ylim = c(0,200)) +g2 <- g2 + scale_y_break(c(50,200), scales = 0.5) +g2 <- g2 + labs(title = 'Kurtosis = 11.8') + +g1 + g2 diff --git a/Fig3_code/ICA_HB_blup.R b/Fig3_code/ICA_HB_blup.R new file mode 100644 index 0000000..c3392ce --- /dev/null +++ b/Fig3_code/ICA_HB_blup.R @@ -0,0 +1,62 @@ + +setwd('/work3/maize_eGWAS/figures/Fig3_code/') +library(lme4) +library(parallel) + +################# calculate Heritability for each of the kept moduals ########### +dfA = read.csv('ICA_matrixA_sel.csv', check.names = F, row.names = 1) +cn = colnames(dfA) +idx = colnames(dfA) %in% colnames(dfA)[duplicated(colnames(dfA))] +dfA <- dfA[, idx] +colnames(dfA) = cn[idx] +length(unique(colnames(dfA))) # 219 + +group <- factor(colnames(dfA)) +all(rownames(dfA) == names(srg) ) #TRUE + +HbICA <- parallel::mclapply(seq_len(nrow(dfA)), function(i){ + df1 <- dfA[i,] + df1 <- data.frame(t(df1)) + df1$gt <- group + colnames(df1)[1] = 'gexp' + model = lmer(gexp ~ (1|gt) , df1) + summary(model) + varComp <- as.data.frame(VarCorr(model,comp='vcov')) + sigmas <- varComp$vcov + H2 <- sigmas[1]/sum(sigmas) + H2 +},mc.cores=8) + +dfHb = data.frame(ModelName = names(srg), ModelSize = sapply(srg, length), + Heritability = unlist(HbICA)) +write.csv(dfHb, file = 'moduleHb_Size.csv', row.names = F, quote = F) +scatter.smooth(log2(dfHb$ModelSize), dfHb$Heritability) + +#################### Calculate BLUPs for each modual for GWAS ################ +dfA = read.csv('ICA_matrixA_sel.csv', check.names = F, row.names = 1) +group <- factor(colnames(dfA)) + +blupOutPut <- lapply(seq_len(nrow(dfA)), function(i){ + df1 <- data.frame(t(dfA[i,])) + df1$gt <- group + colnames(df1)[1] = 'signal' + model = lmer(signal ~ (1|gt) , df1) + varComp <- as.data.frame(VarCorr(model,comp='vcov')) + blup <- coef(model)$gt + blup + #hist(blup[,1]) +}) + +blupOutPut <- do.call(cbind,blupOutPut) +dim(blupOutPut) +colnames(blupOutPut) <- rownames(dfA) #names used for list, for a matrix, use colnames. +blupOutPut[1:5, 1:5] + +write.csv(blupOutPut, file = 'Model_Blups.csv') + +for (i in 1:nrow(blupOutPut)){ + blups <- blupOutPut[i,] + hist(blups) +} + +data.frame(unique(group), levels(group)) diff --git a/Fig3_code/manh.R b/Fig3_code/manh.R new file mode 100644 index 0000000..32dce7b --- /dev/null +++ b/Fig3_code/manh.R @@ -0,0 +1,93 @@ +chr_sizes.maize<-c(307041717, + 244442276, + 235667834, + 246994605, + 223902240, + 174033170, + 182381542, + 181122637, + 159769782, + 150982314) +names(chr_sizes.maize)<-seq(10) +chr_sizes.rice<-c(43270923, + 35937250, + 36413819, + 35502694, + 29958434, + 31248787, + 29697621, + 28443022, + 23012720, + 23207287, + 29021106, + 27531856) +names(chr_sizes.rice)<-seq(12) +# col<-RColorBrewer::brewer.pal(5,'Dark2') +# col<-RColorBrewer::brewer.pal(5,'Paired') +manh<-function(x,chr=NULL,xlim=NULL,ylim=NULL,cut=1e-5,chr_sizes=chr_sizes.maize, + col=c('darkblue','yellowgreen'),pch=20,cex=0.8,chr.split=TRUE, + x.axis=T,ann=NULL,ann.col=1,ann.cex=0.8,ann.font=3,cut.col=2, + log=TRUE,add=FALSE,ylab=expression(-log[10](italic(P))),...) { + x<-as.data.frame(x) + chr_sizes<-chr_sizes+chr_sizes/20 + chrom<-grep('^chr',colnames(x),ignore.case =T) + pos<-grep('^bp|^pos|^ps',colnames(x),ignore.case =T) + pval<-grep('^p$|^pval',colnames(x),ignore.case =T) + if (length(chrom)!=1) { + stop('there is no unique chromosome column in "x"') + } + if (length(pos)!=1) { + stop('there is no unique position column in "x"') + } + if (length(pval)!=1) { + stop('there is no unique pvalue column in "x"') + } + x<-x[order(x[,chrom],x[,pos]),] + chrom<-x[,chrom] + pos<-x[,pos] + pval<-x[,pval] + if (log) { + pval<- -log10(pval) + cut<- -log10(cut) + } + sizes<-c(0,cumsum(chr_sizes[-length(chr_sizes)])) + names(sizes)<-names(chr_sizes) + pos<-pos+sizes[match(chrom,names(sizes))] + color<-rep_len(col,length(chr_sizes)) + names(color)<-names(chr_sizes) + color<-color[match(chrom,names(sizes))] + if (is.null(chr)) { + if (add) { + points(x=pos,y=pval,pch=pch,cex=cex,col=color,...) + }else{ + plot(x=pos,y=pval,pch=pch,cex=cex,col=color,xlab='Chromosome', + xlim=xlim,ylim=ylim,xaxt='n',las=1,ylab=ylab, ...) + if (x.axis) axis(1,cumsum(chr_sizes)-chr_sizes/2,names(chr_sizes)) + abline(h=cut,col=cut.col,lwd=1,lty=2) + if (chr.split) { + abline(v=(cumsum(chr_sizes)-chr_sizes/40)[-length(chr_sizes)], + col='gray') + } + } + if (!missing(ann)) { + h<-max(ylim) + x0<-ann[,2]+sizes[match(ann[,1],names(sizes))] + y0<-ann[,3] + y1<-rep(seq(h,h*0.4,-h*0.1),length=length(y0)) + y1[y1start(gr)-1e6 & df$ps%""),adj = c(1.1,0.5),xpd=T,col=2) + +par(mar=c(2,3,1,0.5)+0.5) +p<-t.test(pheno$Endosperm_Color[pheno$subpop=='tropical'], + pheno$Endosperm_Color[pheno$subpop=='temperate'])$p.value +p <- signif(p,3) +boxplot(pheno$Endosperm_Color[pheno$subpop=='tropical'], + pheno$Endosperm_Color[pheno$subpop=='temperate'],outline=T,ylim=c(0.5,5.5), + col=npg_pal[3],ylab='',las=1, names=NULL,yaxs='i',xaxt='n',boxwex=0.6,medlwd=1, + xlab=NULL,main=bquote(italic(P)==.(p))) +axis(1,at=1:2,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext('Endosperm color',2,line=2.5) +p<-t.test(pheno$Peiffer_2014_GDD_DTA[pheno$subpop=='tropical'], + pheno$Peiffer_2014_GDD_DTA[pheno$subpop=='temperate'])$p.value +p<-signif(p, 3) +boxplot(pheno$Peiffer_2014_GDD_DTA[pheno$subpop=='tropical']/24, + pheno$Peiffer_2014_GDD_DTA[pheno$subpop=='temperate']/24,outline=T, + ylim=c(50,110),medlwd=1, + col=npg_pal[3],ylab=NULL,las=1, names=NULL,yaxs='i',xaxt='n',boxwex=0.6, + xlab=NULL,main=bquote(italic(P)==.(p)) ) +axis(1,at=1:2,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext('Flowering time (d)',2,line=2.5) +p<-t.test(pheno$Upper_Kernel_Shape[pheno$subpop=='tropical'], + pheno$Upper_Kernel_Shape[pheno$subpop=='temperate'])$p.value +p <- signif(p,3) +boxplot(pheno$Upper_Kernel_Shape[pheno$subpop=='tropical'], + pheno$Upper_Kernel_Shape[pheno$subpop=='temperate'],outline=T,ylim=c(0.5,6.5), + col=npg_pal[3],ylab='',las=1, names=NULL,yaxs='i',xaxt='n',boxwex=0.6,medlwd=1, + xlab=NULL,main=bquote(italic(P)==.(p)) ) +axis(1,at=1:2,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext('Upper kernel shape',2,line=2.5) +dev.off() + + +#### y1 +pdf(width = 12,height = 3,file = 'y1_sel.pdf') +gr<-genes[genes$gene_id=='Zm00001d036345'] +af<-snps[snps$snp.nam==cis1$snps[cis1$gene=='Zm00001d036345'],] +layout(matrix(rep(1:4,c(2,1,1,1)),nrow=1)) +par(mar=c(2.5,4,1.5,4),yaxs='i',cex=0.95,cex.axis=0.95,cex.main=0.95,bty='l') +manh(df1,log=F,cut=NULL,bty='u',ylab=NA,col='gray',chr=6,xlim=c(84.8,85.2), + type='n',ylim=c(0,40)) +rect(start(gr)/1e6,0,end(gr)/1e6,40,col=5,border=5) +manh(df1,log=F,add=T,type='l',chr=6,col='gray') +manh(df1,log=F,add=T,type='p',chr=6,col='gray') +points(df2$pos[df2$chr==6]/1e6,df2$pval[df2$chr==6]*100,col='gray',type='l') +points(df2$pos[df2$chr==6]/1e6,df2$pval[df2$chr==6]*100,col='gray',type='p',cex=0.7,pch=4) +axis(4,at=seq(0,120,10),seq(0,120,10)/100,las=1) +mtext('XP-CLR',2,line=2.5) +mtext('Fst',4,line=2.5) +legend(85,40,legend=c('XP-CLR ','Fst'),pch=c(20,4),lty=1, + col='gray',bty='n',ncol=2,xpd=T,xjust = 0.5, yjust = 0) +mtext('chr6',1,at=84.75,line=0.5) +mtext('Mb',1,at=85.24,line=0.5) +pe<-signif(cis1$pvalue[cis1$gene=='Zm00001d036345'],3) +points(af$pos/1e6,1,pch=25,bg=2,col=2,xpd=T) +legend(84.8,40,legend=substitute(eQTL~(italic(P)==x),list(x=pe)),pch=25, + col=2,bty='n',pt.bg=2, text.col=2) + +par(mar=c(2,3,1,0.5)+0.5) +bp<-barplot(as.numeric(hb[hb$gene_id=='Zm00001d036345',c('HbTrop','HbTemp')]), + col=npg_pal[3],las=1,ylim=c(0,1),width = 0.6, space = 4/6, + xlim = c(0.2,2.2)) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(H^2~of~italic('y1')~expression),2,line=2.5) +axis(1,pos=0,c(0,3)) +p<-t.test(pheno$Zm00001d036345[pheno$subpop=='tropical'], + pheno$Zm00001d036345[pheno$subpop=='temperate'])$p.value +p<-signif(p,3) +boxplot(pheno$Zm00001d036345[pheno$subpop=='tropical'], + pheno$Zm00001d036345[pheno$subpop=='temperate'],outline=T,ylim=c(0,8), + col=npg_pal[3],las=1, names=NULL,yaxs='i',xaxt='n',boxwex=0.6,medlwd=1, + xlab=NULL,ylab=NULL,main=bquote(italic(P)==.(p)) ) +axis(1,at=1:2,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(italic('y1')~root~expression),2,line=2.2) +x<-rbind(1-af[,c('RAF.trop', 'RAF.temp')], + af[,c('RAF.trop', 'RAF.temp')]) +rownames(x)<-c(af[,'a2'],af[,'a1']) +bp<-barplot( as.matrix(x),col=npg_pal[2:1],xaxt='s',legend.text =T, + args.legend = list(x=1.3,y=1,xpd=T,bty='n',ncol=2,xjust=0.5,yjust=0), + xaxt='n',las=1,width = 0.6, space = 4/6, xlim = c(0.2,2.2)) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9,pos=0) +mtext(substitute(eQTL~(chr*x:y),list(x=af$chr,y=format(af$pos,big.mark=","))),2,line=2.5) +axis(1,pos=0,c(0,3)) +dev.off() + + +#### sweet2 +pdf(width = 12,height = 3,file = 'sweet2_sel.pdf') +gr<-genes[genes$gene_id=='Zm00001d009365'] +af<-snps[snps$snp.nam==cis1$snps[cis1$gene=='Zm00001d009365'],] +layout(matrix(rep(1:4,c(2,1,1,1)),nrow=1)) +par(mar=c(2.5,4,1.5,4),yaxs='i',cex=0.95,cex.axis=0.95,cex.main=0.95,bty='l') +manh(df1,log=F,cut=NULL,bty='u',ylab=NA,col="gray", + chr=8,xlim=c(59.2,60.2),type='n',ylim=c(0,15)) +rect(start(gr)/1e6,0,end(gr)/1e6,15,col=5,border = 5) +manh(df1,log=F,add=T,type='l',chr=8,col="gray") +manh(df1,log=F,add=T,type='p',chr=8,col="gray") +points(df2$pos[df2$chr==8]/1e6,df2$pval[df2$chr==8]*30,col="gray",type='l') +points(df2$pos[df2$chr==8]/1e6,df2$pval[df2$chr==8]*30,col="gray",type='p',cex=0.7, pch=4) +axis(4,at=seq(0,15,3),seq(0,15,3)/30,las=1) +mtext('XP-CLR',2,line=2.5) +mtext('Fst',4,line=2.5) +legend(59.7,15,legend=c('XP-CLR ','Fst'),pch=c(20,4),lty=1, + col='gray',bty='n',ncol=2,xpd=T,xjust = 0.5, yjust = 0) +mtext('chr8',1,at=59.05,line=0.5) +mtext('Mb',1,at=60.3,line=0.5) +pe<-signif(cis1$pvalue[cis1$gene=='Zm00001d009365'],3) +ps<-signif(cis$pvalue.sqtl[cis$gene=='Zm00001d009365'],3) +points(af$pos/1e6,0.5,pch=25,bg=2,col=2,xpd=T) +points(cis$pos.sqtl[cis$gene=='Zm00001d009365']/1e6,0.5,pch=25,bg=1,col=1) +legend(59.2,15,legend = c(as.expression(substitute(eQTL~(italic(P)==x),list(x=pe))), + as.expression(substitute(sQTL~(italic(P)==x),list(x=ps)))), + pch=25,col=c(2,1),bty='n',pt.bg=c(2,1), text.col=c(2,1),y.intersp=1.2) + +par(mar=c(2,3,1,0.5)+0.5) +bp<-barplot(as.numeric(hb[hb$gene_id=='Zm00001d009365',c('HbTrop','HbTemp')]), + col=npg_pal[3],las=1,ylim=c(0,1),width = 0.6, space = 4/6, xlim = c(0.2,2.2)) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(H^2~of~italic('sweet2')~expression),2,line=2.5) +axis(1,pos=0,c(0,3)) +p<-t.test(pheno$Zm00001d009365[pheno$subpop=='tropical'], + pheno$Zm00001d009365[pheno$subpop=='temperate'])$p.value +p<-signif(p, 3) +boxplot(pheno$Zm00001d009365[pheno$subpop=='tropical'], + pheno$Zm00001d009365[pheno$subpop=='temperate'],outline=T,ylim=c(0,30), + col=npg_pal[3],las=1, names=NULL,yaxs='i',xaxt='n', boxwex=0.6,medlwd=1, + xlab=NULL,ylab=NULL,main=bquote(italic(P)==.(p)) ) +axis(1,at=1:2,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(italic('sweet2')~root~expression),2,line=2.2) +x<-rbind(1-af[,c('RAF.trop', 'RAF.temp')], + af[,c('RAF.trop', 'RAF.temp')]) +rownames(x)<-c(af[,'a2'],af[,'a1']) +bp<-barplot( as.matrix(x),col=npg_pal[2:1],xaxt='s',legend.text =T, + args.legend = list(x=1.3,y=1,xpd=T,bty='n',ncol=2,xjust=0.5,yjust=0), + xaxt='n',las=1,width = 0.6, space = 4/6, xlim = c(0.2,2.2)) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9,pos=0) +mtext(substitute(eQTL~(chr*x:y),list(x=af$chr,y=format(af$pos,big.mark=","))),2,line=2.5) +axis(1,pos=0,c(0,3)) +dev.off() + + +#### mcm4 +pdf(width = 12,height = 3,file = 'mcm4_sel.pdf') +gr<-genes[genes$gene_id=='Zm00001d009374'] +af<-snps[snps$snp.nam==cis1$snps[cis1$gene=='Zm00001d009374'],] +layout(matrix(rep(1:4,c(2,1,1,1)),nrow=1)) +par(mar=c(2.5,4,1.5,4),yaxs='i',cex=0.95,cex.axis=0.95,cex.main=0.95,bty='l') +manh(df1,log=F,cut=NULL,bty='u',ylab=NA,col="gray", + chr=8,xlim=c(60.6,61),type='n',ylim=c(0,20)) +rect(start(gr)/1e6,0,end(gr)/1e6,40,col=5,border =5) +manh(df1,log=F,add=T,type='l',chr=8,col="gray") +manh(df1,log=F,add=T,type='p',chr=8,col="gray") +points(df2$pos[df2$chr==8]/1e6,df2$pval[df2$chr==8]*40,col="gray",type='l') +points(df2$pos[df2$chr==8]/1e6,df2$pval[df2$chr==8]*40,col="gray",type='p',cex=0.7, pch=4) +axis(4,at=seq(0,20,4),seq(0,20,4)/40,las=1) +mtext('XP-CLR',2,line=2.5) +mtext('Fst',4,line=2.5) +legend(60.8,20,legend=c('XP-CLR ','Fst'),pch=c(20,4),lty=1, + col='gray',bty='n',ncol=2,xpd=T,xjust = 0.5, yjust = 0) +mtext('chr8',1,at=60.55,line=0.5) +mtext('Mb',1,at=61.04,line=0.5) +pe<-signif(cis1$pvalue[cis1$gene=='Zm00001d009374'],3) +points(af$pos/1e6,0.5,pch=25,bg=2,col=2) +legend(60.6,20,legend=substitute(eQTL~(italic(P)==x),list(x=pe)),pch=25, + col=2,bty='n',pt.bg=2, text.col=2) + +par(mar=c(2,3,1,0.5)+0.5) +bp<-barplot(as.numeric(hb[hb$gene_id=='Zm00001d009374',c('HbTrop','HbTemp')]), + col=npg_pal[3],las=1,ylim=c(0,1),width = 0.6, space = 4/6, xlim = c(0.2,2.2)) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(H^2~of~italic('mcm4')~expression),2,line=2.5) +axis(1,pos=0,c(0,3)) +p<-t.test(pheno$Zm00001d009374[pheno$subpop=='tropical'], + pheno$Zm00001d009374[pheno$subpop=='temperate'])$p.value +p<-signif(p,3) +boxplot(pheno$Zm00001d009374[pheno$subpop=='tropical'], + pheno$Zm00001d009374[pheno$subpop=='temperate'],outline=T,ylim=c(0,10), + col=npg_pal[3],las=1, names=NULL,yaxs='i',xaxt='n',boxwex=0.6,medlwd=1, + xlab=NULL,ylab=NULL,main=bquote(italic(P)==.(p)) ) +axis(1,at=1:2,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(italic('mcm4')~root~expression),2,line=2.2) +x<-rbind(1-af[,c('RAF.trop', 'RAF.temp')], + af[,c('RAF.trop', 'RAF.temp')]) +rownames(x)<-c(af[,'a2'],af[,'a1']) +bp<-barplot( as.matrix(x),col=npg_pal[2:1],xaxt='s',legend.text =T,las=1, + args.legend = list(x=1.3,y=1,xpd=T,bty='n',ncol=2,xjust=0.5,yjust=0), + xaxt='n',width = 0.6, space = 4/6, xlim = c(0.2,2.2)) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9,pos=0) +mtext(substitute(eQTL~(chr*x:y),list(x=af$chr,y=format(af$pos,big.mark=","))),2,line=2.5) +axis(1,pos=0,c(0,3)) +dev.off() + + +#### Aldolase +pdf(width = 12,height = 3,file = 'aldolase_sel.pdf') +gr<-genes[genes$gene_id=='Zm00001d049559'] +af<-snps[snps$snp.nam==cis1$snps[cis1$gene=='Zm00001d049559'],] +layout(matrix(rep(1:4,c(2,1,1,1)),nrow=1)) +par(mar=c(2.5,4,1.5,4),yaxs='i',cex=0.95,cex.axis=0.95,cex.main=0.95,bty='l') +manh(df1,log=F,cut=NULL,bty='u',ylab=NA,col="gray", + chr=4,xlim=c(34.8,35.4),type='l',ylim=c(0,15)) +rect(start(gr)/1e6,0,end(gr)/1e6,40,col=5,border = 5) +manh(df1,log=F,add=T,type='l',chr=4,col="gray") +manh(df1,log=F,add=T,type='p',chr=4,col="gray") +points(df2$pos[df2$chr==4]/1e6,df2$pval[df2$chr==4]*60,col="gray",type='l') +points(df2$pos[df2$chr==4]/1e6,df2$pval[df2$chr==4]*60,col="gray",type='p',cex=0.7, pch=4) +axis(4,at=seq(0,15,3),seq(0,15,3)/60,las=1) +mtext('XP-CLR',2,line=2.5) +mtext('Fst',4,line=2.5) +legend(35.1,15,legend=c('XP-CLR ','Fst'),pch=c(20,4),lty=1, + col='gray',bty='n',ncol=2,xpd=T,xjust = 0.5, yjust = 0) +mtext('chr4',1,at=34.73,line=0.5) +mtext('Mb',1,at=35.47,line=0.5) +pe<-signif(cis1$pvalue[cis1$gene=='Zm00001d049559'],3) +points(af$pos/1e6,0.5,pch=25,bg=2,col=2) +legend(34.8,15,legend=substitute(eQTL~(italic(P)==x),list(x=pe)),pch=25, + col=2,bty='n',pt.bg=2, text.col=2) + +par(mar=c(2,3,1,0.5)+0.5) +bp<-barplot(as.numeric(hb[hb$gene_id=='Zm00001d049559',c('HbTrop','HbTemp')]), + col=npg_pal[3],las=1,ylim=c(0,1),width = 0.6, space = 4/6, + las=1,xlim = c(0.2,2.2)) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(H^2~of~italic('aldolase')~expression),2,line=2.5) +axis(1,pos=0,c(0,3)) +p<-t.test(pheno$Zm00001d049559[pheno$subpop=='tropical'], + pheno$Zm00001d049559[pheno$subpop=='temperate'])$p.value +p<-signif(p, 3) +boxplot(pheno$Zm00001d049559[pheno$subpop=='tropical'], + pheno$Zm00001d049559[pheno$subpop=='temperate'],outline=T,ylim=c(0,5), + col=npg_pal[3],las=1, names=NULL,yaxs='i',xaxt='n',boxwex=0.6,medlwd=1, + xlab=NULL,ylab=NULL,main=bquote(italic(P)==.(p)) ) +axis(1,at=1:2,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(italic('aldolase')~root~expression),2,line=2.2) +x<-rbind(1-af[,c('RAF.trop', 'RAF.temp')], + af[,c('RAF.trop', 'RAF.temp')]) +rownames(x)<-c(af[,'a2'],af[,'a1']) +bp<-barplot( as.matrix(x),col=npg_pal[2:1],xaxt='s',legend.text =T, + args.legend = list(x=1.3,y=1,xpd=T,bty='n',ncol=2,xjust=0.5,yjust=0), + xaxt='n',width = 0.6, space = 4/6, xlim = c(0.2,2.2),las=1) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9,pos=0) +mtext(substitute(eQTL~(chr*x:y),list(x=af$chr,y=format(af$pos,big.mark=","))),2,line=2.5) +axis(1,pos=0,c(0,3)) +dev.off() + + +#### dsc1 +pdf(width = 12,height = 3,file = 'dsc1_sel.pdf') +gr<-genes[genes$gene_id=='Zm00001d049872'] +af<-snps[snps$snp.nam==cis1$snps[cis1$gene=='Zm00001d049872'],] +layout(matrix(rep(1:4,c(2,1,1,1)),nrow=1)) +par(mar=c(2.5,4,1.5,4),yaxs='i',cex=0.95,cex.axis=0.95,cex.main=0.95,bty='l') +manh(df1,log=F,cut=NULL,bty='u',ylab=NA,col="gray", + chr=4,xlim=c(49.4,50.0),type='l',ylim=c(0,15)) +rect(start(gr)/1e6,0,end(gr)/1e6,40,col=5,border = 5) +manh(df1,log=F,add=T,type='l',chr=4,col="gray") +manh(df1,log=F,add=T,type='p',chr=4,col="gray") +points(df2$pos[df2$chr==4]/1e6,df2$pval[df2$chr==4]*50,col="gray",type='l') +points(df2$pos[df2$chr==4]/1e6,df2$pval[df2$chr==4]*50,col="gray",type='p',cex=0.7, pch=4) +axis(4,at=seq(0,15,3),seq(0,15,3)/50,las=1) +mtext('XP-CLR',2,line=2.5) +mtext('Fst',4,line=2.5) +legend(49.7,15,legend=c('XP-CLR ','Fst'),pch=c(20,4),lty=1, + col='gray',bty='n',ncol=2,xpd=T,xjust = 0.5, yjust = 0) +mtext('chr4',1,at=49.33,line=0.5) +mtext('Mb',1,at=50.07,line=0.5) +pe<-signif(cis1$pvalue[cis1$gene=='Zm00001d049872'],3) +points(af$pos/1e6,0.5,pch=25,bg=2,col=2) +legend(49.4,13,legend=substitute(eQTL~(italic(P)==x),list(x=pe)),pch=25, + col=2,bty='n',pt.bg=2, text.col=2) + +par(mar=c(2,3,1,0.5)+0.5) +bp<-barplot(as.numeric(hb[hb$gene_id=='Zm00001d049872',c('HbTrop','HbTemp')]), + col=npg_pal[3],las=1,ylim=c(0,1),width = 0.6, space = 4/6, + las=1,xlim = c(0.2,2.2)) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(H^2~of~italic('dsc1')~expression),2,line=2.5) +axis(1,pos=0,c(0,3)) +p<-t.test(pheno$Zm00001d049872[pheno$subpop=='tropical'], + pheno$Zm00001d049872[pheno$subpop=='temperate'])$p.value +p <-signif(p, 3) +boxplot(pheno$Zm00001d049872[pheno$subpop=='tropical'], + pheno$Zm00001d049872[pheno$subpop=='temperate'],outline=T,ylim=c(0,5), + col=npg_pal[3],las=1, names=NULL,yaxs='i',xaxt='n',boxwex=0.6,medlwd=1, + xlab=NULL,ylab=NULL,main=bquote(italic(P)==.(p)) ) +axis(1,at=1:2,labels = c('Tropical','Temperate'),cex.axis=0.9) +mtext(expression(italic('dsc1')~root~expression),2,line=2.2) +x<-rbind(1-af[,c('RAF.trop', 'RAF.temp')], + af[,c('RAF.trop', 'RAF.temp')]) +rownames(x)<-c(af[,'a2'],af[,'a1']) +bp<-barplot( as.matrix(x),col=npg_pal[2:1],xaxt='s',legend.text =T, + args.legend = list(x=1.3,y=1,xpd=T,bty='n',ncol=2,xjust=0.5,yjust=0), + xaxt='n',width = 0.6, space = 4/6, xlim = c(0.2,2.2),las=1) +axis(1,at=bp,labels = c('Tropical','Temperate'),cex.axis=0.9,pos=0) +mtext(substitute(eQTL~(chr*x:y),list(x=af$chr,y=format(af$pos,big.mark=","))),2,line=2.5) +axis(1,pos=0,c(0,3)) +dev.off() + diff --git a/other/CallPeak.R b/other/CallPeak.R new file mode 100644 index 0000000..8df5a9c --- /dev/null +++ b/other/CallPeak.R @@ -0,0 +1,33 @@ +#### call peak from output of MatrixEQTL +CallPeak<-function(qtl,snps, wind=1e6, cutoff=0.05/nrow(snps),snpN=1,cores=1){ + require(parallel) + require(data.table) + qtl<-subset(qtl,pvalue<=cutoff) + if (nrow(qtl)==0) return(NULL) + res<-merge(data.table(qtl),data.table(snps)[,c('snps','chr','pos')],by='snps') + res<-split(res,res$gene) + res<-mclapply(res,function(x) { + chr<-split(x,x$chr) + chr<-lapply(chr, function(y) { + y<-y[order(pos)] + idx<-which(diff(y$pos)>wind) + idx.start<-c(1,idx+1) + idx.end<-c(idx,nrow(y)) + n<-idx.end-idx.start+1 + if (all(n=snpN], function(i) { + z<-y[seq(idx.start[i],idx.end[i])] + start<-min(z$pos); end<-max(z$pos) + distance<-end-start + snp_number<-nrow(z) + z<-z[which.min(pvalue)] + data.table(z,snp_number,start,end,distance) + })) + }) + chr<-rbindlist(chr) + if (nrow(chr)==0) return(NULL) + chr<-chr[order(pvalue)] + chr + },mc.cores = cores) + rbindlist(res) +} \ No newline at end of file diff --git a/other/FigS_eqtl.R b/other/FigS_eqtl.R new file mode 100644 index 0000000..40cb01f --- /dev/null +++ b/other/FigS_eqtl.R @@ -0,0 +1,183 @@ +#### function ggpie +ggpie<-function(value,start=pi/2,direction=1.1,rawNumber=TRUE,label.pos=1, + show.legend=T,legend.position='right',legend.title=NA, + legend.text=NA,label.size=4) { + require(ggplot2) + if (is.null(names(value))) { + group<-paste0('group',seq_along(value)) + }else { + group<-names(value) + } + value<-as.numeric(value) + data<-data.frame(value=value,group=group) + labels<- paste0(round(data$value/sum(data$value) * 100, 2), "%") + if (rawNumber) { + labels<- paste0(format(data$value,big.mark = ','),'\n(',labels,')') + } + p<- ggplot(data,aes(x=factor(1),y=value,fill=group),size=label.size*11/4) + + geom_bar(stat="identity",show.legend = show.legend,color='white') + + geom_text(label=labels,aes(x=label.pos),size=label.size, + position = position_stack(vjust = 0.5)) + + theme_void() + + coord_polar("y", start=start,direction=direction) + + theme(legend.position=legend.position, + legend.text = element_text(size=11*label.size/4), + legend.title = element_text(size=11*label.size/4), + legend.key.size = unit(label.size*1.2/4,'lines'), + panel.border = element_rect(fill=NA), + plot.margin = unit(rep(0.2,4), "lines")) + if (!missing(legend.text)) { + p<- p + scale_fill_discrete(name='group',labels=legend.text) + } + if (!missing(legend.title)) { + p<- p + guides(fill=guide_legend(title=legend.title)) + } + p +} + +#### +suppressPackageStartupMessages({ + library(ggplot2) + library(ggpubr) + library(cowplot) + library(data.table) + library(GenomicRanges) +}) +load('../../eGWAS340/eqtl_peak.rda') + +#### frequency +freq<-table(eqtls$gene) +eqtls<-eqtls[eqtls$gene%in%names(freq[freq<=10])] +eqtl1<-eqtls[eqtls$gene%in%names(freq[freq==1])] + +freq<-table(freq) +freq[11]<-sum(freq[-seq(10)]) +freq<-freq[seq(11)] +names(freq)[11]<-'>10' + +p1<-ggplot(data.frame(freq),aes(x=Var1,y=Freq))+ + geom_bar(stat="identity",width = 0.6,fill=c(rep("steelblue",10),'gray')) + + xlab('Number of eQTLs') + + ylab(expression("Number of genes")) + + theme_minimal(12) + + theme(axis.line = element_line(),panel.grid.major.x = element_blank(), + panel.grid.minor.y = element_blank(), + axis.text = element_text(colour = 'black',size=12), + axis.ticks = element_line(colour = 'black')) + + geom_text(aes(label=format(Freq,big.mark = ',')), vjust=-0.5,size=3.5) + + scale_y_continuous(n.breaks =8,limits = c(NA,max(freq)*1.05)) + + +p1.1<-ggpie(table(eqtl1$type),rawNumber = T, + legend.position = 'top', + legend.title = 'Single eQTL gene:', + label.pos = 1.1,label.size = 3,show.legend = T) + + theme(legend.text = element_text(face = 3)) + +#### scatter plot +chr<-read.table('chrLength.txt',header = T) +chr$cumsum<-cumsum(c(0,chr$length[-10])) +load('../../maize_genes.rda') +gene_pos<-genes[eqtls$gene] +eqtl_pos<-GRanges(eqtls$chr,eqtls$pos) +p<- -log10(eqtls$pvalue) +p[p>101]<-101 + +gene_pos<-start(gene_pos)+chr$cumsum[match(as.character(seqnames(gene_pos)),chr$chr)] +eqtl_pos<-start(eqtl_pos)+chr$cumsum[match(as.character(seqnames(eqtl_pos)),chr$chr)] +df<-data.frame(x=gene_pos,y=eqtl_pos,p=p) + +p2<-ggplot(df)+geom_point(aes(x=x,y=y),size=0.2,color='darkblue') + theme_classic() + + labs(x='Gene position (chromosome)',y='eQTL position (chromosome)', + color=bquote("-"*log[10]*(italic(P)))) + + scale_x_continuous(breaks = chr$length/2+chr$cumsum,label=1:10) + + scale_y_continuous(breaks = chr$length/2+chr$cumsum,label=1:10) + +#### pie plot +p3<-ggpie(table(eqtls$type),rawNumber = T,legend.position = c(0.5,0), + legend.title = NULL,label.pos = 1) + + ggtitle("eQTL types") + + theme(panel.border = element_blank(), + legend.text = element_text(face = 3), + legend.direction = 'horizontal', + plot.title = element_text(hjust = 0.5)) + +qtl_type<-table(eqtls$gene,eqtls$type) +qtl1<- sum(qtl_type[,1]>0 & qtl_type[,2]==0) +qtl2<- sum(qtl_type[,1]==0 & qtl_type[,2]>0) +qtl3<- sum(qtl_type[,1]>0 & qtl_type[,2]>0) +p4<-ggpie(c(qtl1,qtl2,qtl3),legend.position = c(0.5,0), + legend.title = NULL, label.pos = 1.1, + legend.text = c('cis','trans','cis+trans')) + + ggtitle("Gene (e-trait) types") + + theme(panel.border = element_blank(), + legend.text = element_text(face = 3), + legend.direction = 'horizontal', + plot.title = element_text(hjust = 0.5)) + +#### gene and eQTL distribution +p5<-paste0('chr',1:10) +for(i in 1:10) { + x<-c(start(genes[seqnames(genes)==i]),eqtls$pos[eqtls$chr==i]) + type<-rep(c('Genes','eQTLs'),c(sum(seqnames(genes)==i),sum(eqtls$chr==i))) + assign(p5[i],ggplot(data.frame(x=x,type=type),aes(x=x,y=after_stat(density))) + + geom_density(aes(fill=type,color=type),size=1,alpha=0.3) + + theme_cowplot() + + xlab(paste0('chr',i)) + + theme(axis.title.y = element_blank(), + axis.text = element_blank(), + legend.position = 'none') + ) +} + + +p6<-plot_grid(get(p5[1]), + get(p5[2]),get(p5[3]),get(p5[4]),get(p5[5]), + get(p5[6]),get(p5[7]),get(p5[8]),get(p5[9]),get(p5[10]), + nrow=2,ncol=5) +#### +p7 <- ggdensity(eqtls, x='MAF', color = "type", fill='type',alpha=0.5, + xlab = "Minor allele frequency", ylab='Density') + + theme(legend.text = element_text(face =3), legend.title = element_blank(), + legend.position = c(0.5, 0.9), legend.direction = 'horizontal') +p8 <- ggdensity(eqtls, x='PVE', color = "type", fill='type',alpha=0.5, + xlab = "eQTL PVE", ylab='Density') + + theme(legend.text = element_text(face =3), legend.title = element_blank(), + legend.position = c(0.5, 0.9), legend.direction = 'horizontal') + +### permutation +load('../../eGWAS340/SliceData_fpkm.rda') +load('../../eGWAS340/perm_res.rda') + +f <- factor(res$gene, levels = unlist(fpkm$rowNameSlices)) +pval <- tapply(res$pvalue, f, min) +pval[is.na(pval)] <- 1e-5 + +quantile(pval ,c(0.01,0.02,0.05)) + +snps <- data.table::fread("../../geno340/snps.bim") +perm0.05 <- -log10(quantile(pval, 0.05)) +cutoff <- -log10(0.05/nrow(snps)) + +p9 <- as_grob(function () { + par(mar=c(4, 4, 0.5, 0.5)) + plot(density(-log10(pval),from=5), xlim=c(5, 15), col=2, lwd=2, + xlab=bquote(-log[10](italic(P))), main=NA, las=1) + abline(v=perm0.05, col=2, lty=2, lwd=2) + lines(density(-log10(eqtls$pvalue), from=cutoff), col=4, lwd=2) + abline(v=cutoff, col=4, lty=2, lwd=2) + legend(10, 0.6, legend = c("Permutation", "eQTLs"), col=c(2,4), lwd=2,bty='n') + mtext(round(perm0.05,2), 1, col=2, at=perm0.05, line = 0.5, adj = 1, font=2) + mtext(round(cutoff,2), 1, col=4, at=cutoff, line = 0.5, adj = 0, font=2) +}) + +#### +pdf(width = 12,height = 8,file='../FigS_eqtl.pdf') +pr1 <- plot_grid(ggdraw(p1)+draw_grob(as_grob(p1.1),scale = 0.7,x=0.2,y=0.15), + p7, p8, rel_widths = c(1, 0.5, 0.5), labels = 'auto', + scale = 0.98, nrow=1) +pr2 <- plot_grid(p3, p4, p9, rel_widths = c(0.5, 0.5, 1), nrow=1, + labels = c('d','e','f'), scale = 0.98) +plot_grid(pr1, pr2, ncol=1) +dev.off() + diff --git a/other/FigS_sqtl.R b/other/FigS_sqtl.R new file mode 100644 index 0000000..2192d47 --- /dev/null +++ b/other/FigS_sqtl.R @@ -0,0 +1,118 @@ +#### +ggpie<-function(value,start=pi/2,direction=1.1,rawNumber=TRUE,label.pos=1, + show.legend=T,legend.position='right',legend.title=NA, + legend.text=NA) { + require(ggplot2) + if (is.null(names(value))) { + group<-paste0('group',seq_along(value)) + }else { + group<-names(value) + } + value<-as.numeric(value) + data<-data.frame(value=value,group=group) + labels<- paste0(round(data$value/sum(data$value) * 100, 2), "%") + if (rawNumber) { + labels<- paste0(format(data$value,big.mark = ','),'\n(',labels,')') + } + p<- ggplot(data,aes(x=factor(1),y=value,fill=group)) + + geom_bar(stat="identity",show.legend = show.legend,color='white') + + geom_text(label=labels,aes(x=label.pos), + position = position_stack(vjust = 0.5)) + + theme_void() + + coord_polar("y", start=start,direction=direction) + + theme(legend.position=legend.position, + legend.text = element_text(size=12), + panel.border = element_rect(fill=NA), + plot.margin = unit(rep(0.2,4), "lines")) + if (!missing(legend.text)) { + p<- p + scale_fill_discrete(name='group',labels=legend.text) + } + if (!missing(legend.title)) { + p<- p + guides(fill=guide_legend(title=legend.title)) + } + p +} + +#### +load('../../sqtl_peak.rda') +suppressPackageStartupMessages({ + library(ggplot2) + library(ggpubr) + library(data.table) +}) + + +freq<-table(sqtls$intron) +sqtls<-sqtls[sqtls$intron%in%names(freq[freq<=10])] + +freq<-table(freq) +freq[11]<-sum(freq[-seq(10)]) +freq<-freq[seq(11)] +names(freq)[11]<-'>10' + +p1<-ggplot(data.frame(freq),aes(x=Var1,y=Freq))+ + geom_bar(stat="identity",width = 0.6,fill=c(rep("steelblue",10),'gray')) + + xlab('Number of sQTLs') + + ylab(expression("Number of introns")) + + theme_minimal(12) + + theme(axis.line = element_line(),panel.grid.major.x = element_blank(), + panel.grid.minor.y = element_blank(), + axis.text = element_text(color=1, size=11), + axis.ticks = element_line(color=1)) + + geom_text(aes(label=format(Freq,big.mark = ',')), vjust=-0.5,size=2.8) + + scale_y_continuous(n.breaks =8,limits = c(NA,max(freq)*1.05)) + +#### +p2<-ggpie(table(sqtls$type),rawNumber = T,legend.position = c(0.5,0.05), + legend.title = NULL) + + ggtitle("sQTL types") + + theme(panel.border = element_blank(), + legend.text = element_text(face = 3), + legend.direction = "horizontal", + plot.title = element_text(hjust=0.5, vjust=0)) + +#### +qtl_type<-table(sqtls$intron,sqtls$type) +qtl1<- sum(qtl_type[,1]>0 & qtl_type[,2]==0) +qtl2<- sum(qtl_type[,1]==0 & qtl_type[,2]>0) +qtl3<- sum(qtl_type[,1]>0 & qtl_type[,2]>0) +p3<-ggpie(c(qtl1,qtl2,qtl3),legend.position = c(0.5,.05), + legend.title = NULL, legend.text = c('cis','trans','cis+trans')) + + ggtitle("Intron (splicing event) types") + + theme(panel.border = element_blank(), + legend.direction = "horizontal", + plot.title = element_text(hjust=0.5, vjust = 0), + legend.text = element_text(face = 3)) + +#### +p4 <- ggdensity(sqtls, x='PVE', color='type', fill='type', + xlab = 'sQTL PVE', ylab='Density') + + theme(legend.text = element_text(face =3), legend.title = element_blank(), + legend.position = c(0.5, 0.9), legend.direction = 'horizontal') + +df<-data.frame(sqtls[,c('type','PVE','R2')]) +lab<-paste0('italic(',c('cis','trans'),')*','"-sQTLs"') +p4.1<-ggplot(df,aes(x=type,y=PVE))+ + geom_violin(aes(fill=type),show.legend = FALSE,draw_quantiles=0.5) + + theme_light() + xlab(NULL) + ylab('PVE') + + scale_x_discrete(labels = parse(text = lab)) + + theme(axis.text.x = element_text(angle = 30,hjust = 1)) + +p4.2<- ggplot(df,aes(x=PVE*100,y=after_stat(density)))+ + geom_density(aes(fill=type,color=type),size=1,alpha=0.3) + + theme_classic() + xlab('PVE (%)') + labs(fill='sQTL type:',col='sQTL type:') + + scale_x_continuous(expand = expansion(0,0.05),limits = c(0,105)) + + scale_y_continuous(name='Density',expand = expansion(0,0)) + + theme(legend.position = c(0.7,0.75), + legend.text = element_text(face=3)) + +p5<- cowplot::plot_grid(p1, p4.2, align = "h", nrow = 1,labels = c(NA,'b'), + rel_widths = c(0.7,0.3), axis = 'bt') +p6<- cowplot::plot_grid(p2, p3, align = "h", nrow = 1,labels = c(NA,'d')) + +p7<- cowplot::plot_grid(p5, p6, nrow=2,labels =c('a','c'),scale=0.98) + +#### +pdf(width = 10,height = 7,file='../FigS_sqtl.pdf') +plot_grid(p1, p2, p4, p3, labels = 'auto') +dev.off() diff --git a/other/eqtl_perm.R b/other/eqtl_perm.R new file mode 100644 index 0000000..502811d --- /dev/null +++ b/other/eqtl_perm.R @@ -0,0 +1,73 @@ + +####### +suppressPackageStartupMessages({ + library(parallel) + library(MatrixEQTL) + library(data.table) + library(GenomicRanges) +}) + +load('SliceData_fpkm.rda') +load('../SlicedData_snps.rda') + +ls() +snps +m <- nrow(fpkm) +n <- ncol(fpkm) + +set.seed(2023) +snps$ColumnSubsample(sample(n)) +snps + +idx <- split(seq_len(m), cut(seq_len(m), 36, labels=F)) + +res <- pbapply::pblapply(idx, function(i) { + fpkm$RowReorder(i) + suppressMessages( + me <- Matrix_eQTL_engine( + snps = snps, + gene = fpkm, + cvrt = cvrt, + output_file_name = NULL, + pvOutputThreshold = 1e-5, + useModel = modelLINEAR, + errorCovariance = numeric(), + verbose = FALSE, + pvalue.hist = FALSE, + min.pv.by.genesnp = FALSE, + noFDRsaveMemory = FALSE + )) + me$all$eqtls +}, cl = 36) + +res <- do.call(rbind, res) + +save(res,file='perm_res.rda') + +#### +load('SliceData_fpkm.rda') +load('perm_res.rda') +load('eqtl_peak.rda') + +f <- factor(res$gene, levels = unlist(fpkm$rowNameSlices)) +pval <- tapply(res$pvalue, f, min) +pval[is.na(pval)] <- 1e-5 + +quantile(pval ,c(0.01,0.02,0.05)) + +snps <- data.table::fread("../geno340/snps.bim") +perm0.05 <- -log10(quantile(pval, 0.05)) +cutoff <- -log10(0.05/nrow(snps)) + +pdf(width = 7, height = 4, file='permutation.pdf') +par(mar=c(4, 4, 0.5, 0.5)) +plot(density(-log10(pval),from=5), xlim=c(5, 15), col=2, lwd=2, + xlab=bquote(-log[10](italic(P))), main=NA, las=1) +abline(v=perm0.05, col=2, lty=2, lwd=2) +lines(density(-log10(eqtls$pvalue), from=cutoff), col=4, lwd=2) +abline(v=cutoff, col=4, lty=2, lwd=2) +legend(12, 0.6, legend = c("Permutation", "eQTLs"), col=c(2,4), lwd=2,bty='n') +mtext(round(perm0.05,2), 1, col=2, at=perm0.05, line = 0.5, adj = 1, font=2) +mtext(round(cutoff,2), 1, col=4, at=cutoff, line = 0.5, adj = 0, font=2) +dev.off() + diff --git a/other/selection_analysis.R b/other/selection_analysis.R new file mode 100644 index 0000000..69e3218 --- /dev/null +++ b/other/selection_analysis.R @@ -0,0 +1,73 @@ + +#### +suppressPackageStartupMessages({ + library(data.table) + library(GenomicRanges) + library(parallel) +}) +source('../manh.R') +load('../maize_genes.rda') +load('../cis_eqtl_sqtl.rda') +load('../psi340_in_expr_genes.rda') +pp<-c('All','Tropical','Temperate','Before1980','After1990') +col<-c(2:6,RColorBrewer::brewer.pal(5,'Dark2')) + +#### pi +pi1<-fread('tropical.windowed.pi') +colnames(pi1)<-c('chr','start','end','snps.trop','pi.trop') +pi1<-GRanges(pi1) +pi2<-fread('temperate.windowed.pi') +colnames(pi2)<-c('chr','start','end','snps.temp','pi.temp') +pi2<-GRanges(pi2) +pi<-pi1[pi1%in%pi2] +pi$snps.temp<-pi2$snps.temp[match(pi,pi2)] +pi$pi.temp<-pi2$pi.temp[match(pi,pi2)] +pi$ratio<-pi$pi.trop/pi$pi.temp +cut_pi<-median(pi$ratio) # 0.9464664 +pi<-pi[pi$ratio>=cut_pi] +rm(pi1,pi2) + +#### xpclr +load('sel_res.rda') +xpclr_sel<-res[!is.na(res$xpclr)] +cut_xpclr<-quantile(xpclr_sel$xpclr,0.9) # 6.150673 +xpclr_sel$xpclr_norm<- (xpclr_sel$xpclr - mean(xpclr_sel$xpclr))/sd(xpclr_sel$xpclr) +xpclr_sel<-xpclr_sel[xpclr_sel%in%pi & xpclr_sel$xpclr>cut_xpclr] + +df1<-data.frame(chr=seqnames(res),pos=start(res),pval=res$xpclr) +manh(df1, cut = cut_xpclr,log=F,ylab='XP-CLR') + +xpclr_genes<-subsetByOverlaps(genes,xpclr_sel,type='within') +xpclr_genes$xpclr<-sapply(seq_along(xpclr_genes),function(i) { + mean(xpclr_sel$xpclr[xpclr_sel%over%xpclr_genes[i]]) +}) + +#### fst +fst_sel<-fread('region_fst.windowed.weir.fst') +colnames(fst_sel)<-c('chr','start','end','snps','w_fst','m_fst') +fst_sel<-GRanges(fst_sel) +cut_fst<-quantile(fst_sel$w_fst,0.9) # 0.1774624 + +df2<-data.frame(chr=seqnames(fst_sel),pos=start(fst_sel),pval=fst_sel$w_fst) +manh(df2, cut = cut_fst,log=F,ylab='Fst') + +fst_sel<-fst_sel[fst_sel%in%pi & fst_sel$w_fst>=cut_fst] +fst_genes<-subsetByOverlaps(genes,fst_sel,type='within') +fst_genes$fst<-sapply(seq_along(fst_genes),function(i) { + mean(fst_sel$w_fst[fst_sel%over%fst_genes[i]]) +}) + +#### selection genes +sel_genes<-genes[genes$gene_id%in%xpclr_genes$gene_id & + genes$gene_id%in%fst_genes$gene_id] +sel_genes$cis_eqtl<-cis1$snps[match(sel_genes$gene_id,cis1$gene)] +sel_genes$pval_eqtl<-cis1$pvalue[match(sel_genes$gene_id,cis1$gene)] +sel_genes$cis_sqtl<-cis2$snps[match(sel_genes$gene_id,cis2$gene)] +sel_genes$pval_sqtl<-cis2$pvalue[match(sel_genes$gene_id,cis2$gene)] +sel_genes$ld<-cis$ld[match(sel_genes$gene_id,cis$gene)] +sel_genes$xpclr<-xpclr_genes$xpclr[match(sel_genes$gene_id,xpclr_genes$gene_id)] +sel_genes$fst<-fst_genes$fst[match(sel_genes$gene_id,fst_genes$gene_id)] + +write.csv(sel_genes,'sel_genes_042121.csv') +save(pi,cut_pi,xpclr_sel,xpclr_genes, cut_xpclr, fst_sel,fst_genes,cut_fst, + sel_genes,df1, df2, file='sel_genes_042121.rda') diff --git a/other/xpclr.R b/other/xpclr.R new file mode 100644 index 0000000..6fe799d --- /dev/null +++ b/other/xpclr.R @@ -0,0 +1,48 @@ + +#### +suppressPackageStartupMessages({ + library(data.table) + library(GenomicRanges) + library(parallel) + load('gmap.rda') +}) + +snp<-fread('region_snp.bim') +colnames(snp)<-c('chr','snp','gpos','ppos','a1','a2') +snp<-split(snp,snp$chr) +res<-mclapply(seq(10),function(i) { + x<-snp[[i]] + x$gpos<-x$ppos*gmap$rate[i]/100 + ir<-IRanges(seq(1,max(x$ppos),0.9e6),width = 1e6) + end(ir)[length(ir)]<-max(x$ppos) + if (width(ir)[length(ir)]<0.1e6) { + ir<-ir[-length(ir)] + end(ir)[length(ir)]<-max(x$ppos) + } + for (j in seq_along(ir)) { + slice<-sprintf('chr%d_%s_%s',i,start(ir[j]),end(ir[j])) + cmd<-paste('plink --bfile region_snp --keep-allele-order --recode 01 transpose --output-missing-genotype 9', + sprintf("--keep-fam temperate.txt --chr %d --from-bp %s --to-bp %s --out geno_slices/%s", + i,start(ir[j]),end(ir[j]),slice)) + system(cmd) + system(sprintf("cut -d' ' -f 5- geno_slices/%s.tped > geno_slices/%s.geno1",slice,slice)) + + cmd<-paste('plink --bfile region_snp --keep-allele-order --recode 01 transpose --output-missing-genotype 9', + sprintf("--keep-fam tropical.txt --chr %d --from-bp %s --to-bp %s --out geno_slices/%s", + i,start(ir[j]),end(ir[j]),slice)) + system(cmd) + system(sprintf("cut -d' ' -f 5- geno_slices/%s.tped > geno_slices/%s.geno2",slice,slice)) + + write.table(x[x$ppos %within% ir[j],c(2:1,3:6)],sprintf('geno_slices/%s.snp',slice), + row.names = F,col.names = F,quote = F,sep='\t') + + cmd<-paste(sprintf("XPCLR -xpclr geno_slices/%s.geno1 geno_slices/%s.geno2 geno_slices/%s.snp out/%s", + slice, slice, slice, slice), + sprintf("-w1 0.0005 100 100 %d -p1 0.95",i)) + system(cmd) + } +},mc.cores=10) + + + +