Add files via upload

xiaoguanghuan123 · Jan 13, 2023 · b5c2b16 · b5c2b16
1 parent f5bbdd1
commit b5c2b16
Show file tree

Hide file tree

Showing 18 changed files with 2,449 additions and 0 deletions.
diff --git a/Fig1_code/Fig1.R b/Fig1_code/Fig1.R
@@ -0,0 +1,126 @@
+
+#######
+setwd('/work3/maize_eGWAS/figures/Fig1_code/')
+suppressPackageStartupMessages({
+    library(ggpubr)
+    library(cowplot)
+})
+
+npg<-ggsci::pal_npg('nrc')(10)
+hb<-read.csv('e340_Heritability.csv',row.names = 1)
+
+#### Hb histgram
+p1<-ggplot(hb,aes(Hb)) + 
+    stat_bin(breaks=seq(0,1,0.05),fill='gray',color=1) + 
+    theme_classic(12) + 
+    theme(axis.text = element_text(colour = 'black'),
+          axis.ticks = element_line(colour = 'black')) +
+    xlab(bquote(H^2)) + ylab('Number of genes') +
+    scale_x_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) +
+    scale_y_continuous(expand = expansion(c(0,0.05))) 
+
+#### Hb 2D density
+p2<-ggplot(hb,aes(log2GEMean,Hb)) + 
+    stat_density_2d(geom='polygon',aes(fill = after_stat(level)),
+                    breaks=seq(0,1,0.05)) +
+    scale_fill_distiller(palette=12, direction=1) +
+    xlab(bquote(log[2](FPKM))) + ylab(bquote(H^2)) +
+    theme_classic(12) +
+    theme(legend.position = c(0.9,0.8),legend.key.size = unit(12,'point'),
+          axis.text = element_text(colour = 'black'),
+          axis.ticks = element_line(colour = 'black')) +
+    scale_x_continuous(expand = expansion(c(0,0.05)),limits = c(0,NA)) +
+    scale_y_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) 
+
+
+#### Volcano plot
+library(EnhancedVolcano)
+vol <- read.csv('Hb_GO_redundancyReduced.csv',row.names=1)
+head(vol)
+max(vol[which(vol$FDR<=0.05),"pValue"])
+
+BP = vol$desc[vol$Category =='Biological process']
+CC = vol$desc[vol$Category =='Cellular component']
+MF = vol$desc[vol$Category =='Molecular function']
+celltype1 <- BP
+celltype2 <- MF
+keyvals.shape <- ifelse(vol$desc %in% celltype1, 17,
+                        ifelse(vol$desc %in% celltype2, 19, 18))
+
+keyvals.shape[is.na(keyvals.shape)] <-18
+names(keyvals.shape)[keyvals.shape == 18] <- 'Cellular component'
+names(keyvals.shape)[keyvals.shape == 17] <- 'Biological process'
+names(keyvals.shape)[keyvals.shape == 19] <- 'Molecular function'
+volSig = subset(vol, abs(log2FoldChange)>log2(1.2) & FDR <=0.05) # & Category != 'Cellular component')
+rownames(volSig)
+
+p3 <- EnhancedVolcano(vol,lab = vol$desc,x = 'log2FoldChange',y = 'pValue',
+                     shapeCustom = keyvals.shape,
+                     pCutoff = 4.94e-05, FCcutoff = log2(1.1), colAlpha = .7,
+                     ylim = c(0, 12), xlim = c(-0.8,0.8),
+                     pointSize = 2,
+                     labSize = 2,
+                     caption = 'total = 981 GO terms',
+                     gridlines.major = T,gridlines.minor = F,
+                     title = NULL, subtitle = NULL,
+                     xlab = bquote(log[2]("fold change")),
+                     ylab = bquote(-log[10](italic(P))),
+                     borderWidth = 1.5,
+                     legendLabSize=9,
+                     legendIconSize = 2.5,
+                     drawConnectors = F) +
+    theme_classic(12)   +
+    scale_y_continuous(expand = expansion(c(0,0.05)),limits = c(0,12)) +
+    guides(color = FALSE) + xlim(-1,0.6) +
+    theme(legend.position = 'top', legend.title = element_blank(),
+          legend.text = element_text(size=9),
+          axis.text = element_text(colour = 'black'),
+          axis.ticks = element_line(colour = 'black'))
+
+#### Hb_Trop_VS_Temperate
+df <- read.csv('Hb_Trop_VS_Temperate.csv',row.names = 1)
+df <- subset(df, HbTemp >0 & HbTrop > 0)
+
+p4<-ggplot(data = df, aes(HbTemp, HbTrop)) +
+    stat_density_2d(aes(fill = ..level..), geom='polygon') +
+    # geom_smooth(position = 'identity', method = lm) +
+    scale_fill_distiller(palette=12, direction=1) +
+    theme(legend.position='right')+
+    xlim(0,1) + ylim(0,1) +
+    xlab(bquote(H^2~"Tropical")) +
+    ylab(bquote(H^2~"Temperate")) +
+    theme_classic(12)
+# coef <- round(coef(lm(HbTemp~HbTrop, df)), 2)
+# p4 <- p4 + annotate("text", x=0.05,  y= 0.95, hjust=0, col=2, size=4, fontface="italic",
+#                    label=substitute(y==a*x+b, list(a=coef[2], b=coef[1]))) +
+#     theme(legend.position = c(0.9,0.3),legend.key.size = unit(12,'point'),
+#           axis.text = element_text(colour = 'black'),
+#           axis.ticks = element_line(colour = 'black')) +
+#     scale_x_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) +
+#     scale_y_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) 
+p4
+
+p5<-ggplot(data.frame(x=c(df$HbTemp, df$HbTrop), 
+                      y=rep(c("Temperate","Tropical"),each=nrow(df))),
+           aes(x, fill=y, color=y)) +
+    geom_density(alpha=0.3,lwd=1,show.legend = T) +
+    theme_classic(12)  +
+    labs(x=bquote(H^2), y='Density') +
+    scale_x_continuous(expand = expansion(c(0,0.05)),limits = c(0,1)) +
+    scale_y_continuous(expand = expansion(c(0,0.05)),limits = c(0,NA)) +
+    theme(legend.position = c(0.8,0.9),legend.title = element_blank(),
+          legend.key.size = unit(12,'point'), legend.text = element_text(size=8),
+          axis.text = element_text(colour = 'black'),
+          axis.ticks = element_line(colour = 'black')) 
+
+####
+pc1 <- plot_grid(p1,p2,align = 'v',ncol=1,labels = letters[1:2])
+pc2 <- plot_grid(p3,labels=letters[3])
+pc3 <- plot_grid(p4,p5,align = 'v',ncol=1,labels = letters[4:5])
+
+pdf(width = 10,height = 6, file='../Fig1.pdf')
+plot_grid(pc1,pc2,pc3,nrow=1,rel_widths = c(1,1.5))
+dev.off()
+
+
+
diff --git a/Fig1_code/Heritability.R b/Fig1_code/Heritability.R
@@ -0,0 +1,240 @@
+#factor:  
+options(stringsAsFactors = FALSE)
+rm(list=ls())
+gc()
+library('lme4')
+library('parallel')
+
+################### Heritability for Tropical lines ############################
+dfTrop <- read.csv('e340_trop_repUnmerged_Untransformed_fpkm.csv', header= TRUE, 
+                   check.names = FALSE, row.names = 1)
+
+
+###### only use duplicated genotypes #####
+cn = colnames(dfTrop)
+idx = colnames(dfTrop) %in% colnames(dfTrop)[duplicated(colnames(dfTrop))]
+dfTrop <- dfTrop[, idx]
+colnames(dfTrop) = cn[idx]
+length(unique(colnames(dfTrop)))
+group <- factor(colnames(dfTrop))
+geneList = rownames(dfTrop)
+
+############### Run mixed linear model ############################
+HbTrop <- pbapply::pblapply(seq_along(geneList),  function(i){
+  trait = geneList[i]
+  gexp = as.numeric(dfTrop[trait, ])
+  model = lmer(gexp ~ (1|group))
+  varComp <- as.data.frame(VarCorr(model,comp='vcov'))
+  sigmas <- varComp$vcov
+  H2 <- sigmas[1]/sum(sigmas)
+  H2
+}, cl=32)
+
+################### Heritability for Temperate lines ###########################
+dfTemp <- read.csv('e340_temp_repUnmerged_Untransformed_fpkm.csv', header= TRUE, check.names = FALSE, row.names = 1)
+#################### only use duplicated genotypes #############################
+cn = colnames(dfTemp)
+idx = colnames(dfTemp) %in% colnames(dfTemp)[duplicated(colnames(dfTemp))]
+dfTemp <- dfTemp[, idx]
+colnames(dfTemp) = cn[idx]
+length(unique(colnames(dfTemp)))
+group <- factor(colnames(dfTemp))
+geneList = rownames(dfTemp)
+################## Run the mixed linear model ##################################
+HbTemp <- pbapply::pblapply(seq_along(geneList),  function(i){
+  trait = geneList[i]
+  gexp = as.numeric(dfTemp[trait, ])
+  model = lmer(gexp ~ (1|group))
+  varComp <- as.data.frame(VarCorr(model,comp='vcov'))
+  sigmas <- varComp$vcov
+  H2 <- sigmas[1]/sum(sigmas)
+  H2
+}, cl=32)
+
+########################################### Combine ##################################
+library(ggplot2)
+library(grid)
+library(MASS) 
+library(reshape2) 
+library(reshape)
+HbTrop <- as.numeric(HbTrop)
+HbTemp <- as.numeric(HbTemp)
+dfHb_Comp = data.frame(HbTrop = HbTrop, HbTemp = HbTemp)
+rownames(dfHb_Comp) <- geneList
+write.csv(dfHb_Comp, file = 'Hb_Trop_VS_Temperate.csv')
+
+dfHb_Comp <- read.csv('Hb_Trop_VS_Temperate.csv', row.names = 1)
+dfHbTempLow <- subset(dfHb_Comp, dfHb_Comp$HbTrop*0.8 >= dfHb_Comp$HbTemp)
+write.csv(dfHbTempLow, file = 'TempHbLow.csv')
+write.csv(rownames(dfHbTempLow), file = 'HbTempLow.list', row.names = F,  quote = F)
+
+
+#### plot the 2d density plot #######
+dfHb_Comp <- read.csv('Hb_Trop_VS_Temperate.csv', row.names = 1)
+dfHb_Comp <- subset(dfHb_Comp, HbTemp >0 & HbTrop > 0)
+
+p1<- ggplot(data = dfHb_Comp, aes(HbTrop, HbTemp))+
+  stat_density_2d(aes(fill = ..level..), geom='polygon')+
+  geom_smooth(position = 'identity', method = lm)+
+  scale_fill_distiller(palette=12, direction=1) +
+  theme(legend.position='right')+
+  xlim(0,1) + ylim(0,1) +
+  xlab("H2 Tropical") +
+  ylab("H2 Temperate") +
+  theme_classic()
+
+coef <- round(coef(lm(HbTemp~HbTrop, dfHb_Comp)), 2)
+
+p1 = p1 + annotate("text", x=0.05,  y= 1, hjust=0, col="red", size=4, fontface="italic",
+                   label=substitute(y==a*x+b, list(a=coef[1], b=coef[2])))
+p1
+
+########## distribution of the tropical H2 
+dfHb_Comp <- read.csv('Hb_Trop_VS_Temperate.csv')
+dfHb_trop_high <- subset(dfHb_Comp, HbTrop > HbTemp)
+dfHb_trop_low <- subset(dfHb_Comp, HbTrop < HbTemp) 
+dfHb_trop_high$comparison = 'TropicalHigh'
+dfHb_trop_low$comparison = 'TropicalLow'
+dfHb_Comp_merge = rbind(dfHb_trop_high, dfHb_trop_low)
+
+ggplot(data = dfHb_Comp_merge, aes(HbTrop, fill = comparison)) +
+  geom_density(alpha=0.4) +
+  theme_classic()
+
+######################################################
+################### Heritability for all 340 lines ############################
+dfexp <- read.csv('eGWAS340_repUnmerged_Untransformed_fpkm.csv', header= TRUE, check.names = FALSE, row.names = 1)
+cn = colnames(dfexp)
+idx = colnames(dfexp) %in% colnames(dfexp)[duplicated(colnames(dfexp))]
+dfexp <- dfexp[, idx]
+colnames(dfexp) = cn[idx]
+length(unique(colnames(dfexp)))
+
+group <- factor(colnames(dfexp))
+gtList = unique(colnames(dfexp))
+geneList = rownames(dfexp)
+HbOutput <- pbapply::pblapply(seq_along(geneList),  function(i){
+  trait = geneList[i]
+  gexp = as.numeric(dfexp[trait, ])
+  model = lmer(gexp ~ (1|group))
+  varComp <- as.data.frame(VarCorr(model,comp='vcov'))
+  sigmas <- varComp$vcov
+  H2 <- sigmas[1]/sum(sigmas)
+  H2
+},cl=32)
+
+Hb = unlist(HbOutput)
+gexpMeanlog <- log2(rowMeans(dfexp)) #[sapply(dfexp,is.numeric)]))
+dfHb = data.frame(geneID = geneList, Hb = Hb, log2GEMean = gexpMeanlog)
+write.csv(dfHb, file = 'e340_Heritability.csv')
+
+#########################################################################################
+library(ggplot2)
+library("ggpubr")
+dfHb <- read.csv('e340_Heritability.csv', row.names = 1, header = T)
+p1 <- ggplot(dfHb, aes(x=Hb)) + 
+  geom_histogram(bins = 20, color='black', fill='grey') +
+  xlab(bquote(~H^2))+ 
+  ylab('Number of genes') +
+  theme_bw()+ 
+  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
+p1
+#p1 <- p1 + theme(axis.text.y = element_text(face = 'bold', size = 10, colour = 'black'), axis.text.x = element_text(face='bold', size = 10, colour = 'black'))
+#p1 <- p1 + theme(axis.title.y = element_text(face = 'bold', size=10,  colour = 'black'), title = element_text(face='bold', size=10, colour = 'black'))
+#p1 <- p1 + theme(axis.ticks.length = unit(.25,"cm"), axis.ticks = element_line(size=1,colour = 'black'))
+
+
+p2 <- ggplot(dfHb, aes(x=log2GEMean, y=Hb) ) +
+  stat_density_2d(aes(fill = ..level..), geom='polygon')+
+  scale_fill_distiller(palette=12, direction=1) +
+  scale_x_continuous(expand = c(0, 0)) +
+  scale_y_continuous(expand = c(0, 0)) +
+  theme(legend.position='right')+
+  xlab(bquote(~log[2](FPKM))) +
+  ylab(bquote(~H^2))
+p2
+#p2 <- p2 + theme(axis.text.y = element_text(face = 'bold', size = 10, colour = 'black'), axis.text.x = element_text(face='bold', size = 10, colour = 'black'))
+#p2 <- p2 + theme(axis.title.y = element_text(face = 'bold', size=10,  colour = 'black'), title = element_text(face='bold', size=10, colour = 'black'))
+#p2 <- p2 + theme(axis.ticks.length = unit(.15,"cm"), axis.ticks = element_line(size=0.5,colour = 'black'))
+library(ggrepel)
+library(gridExtra)
+grid.arrange(p1,p2, nrow=1)
+
+
+#################### Narrow sense heritability (use sommer) #################
+rm(list=ls())
+options(stringsAsFactors = FALSE)
+suppressPackageStartupMessages({
+    library(data.table)
+    library(pbapply)
+    library(snpStats)
+    library(sommer)
+})
+
+# get duplicated individuals
+dfexp <- fread("eGWAS340_repUnmerged_Untransformed_fpkm.csv",nrow=1)
+#group <- unique(colnames(dfexp))[-1]
+group <- unique(colnames(dfexp)[duplicated(colnames(dfexp))])
+length(group)
+
+# calculate additive relationship matrix
+plink <- read.plink("../../geno340/snps_prune.bed")
+snp <- as(plink$genotypes[plink$fam$pedigree %in% group, ], 'numeric')
+A <- A.mat(snp - 1)
+dim(A)
+
+# get gene expression matrix
+f <- fread("eGWAS340_RepMerged_Untransformed_fpkm.csv", data.table = F)
+f[1:5,1:5]
+phe <- as.matrix(f[, match(group, colnames(f))])
+rownames(phe) <- f$V1
+traits <- rownames(phe)
+
+# calculate h2
+dfh2 <- pblapply(seq_along(traits), function(i){
+    geneID <- traits[i]
+    df2 <- data.frame(lineName=group, gexp=phe[geneID, ]) 
+    q <-mmer(gexp ~ 1, random = ~vsr(lineName, Gu=A), 
+             rcov = ~units, data = df2, tolParInv = 1e-3,
+             verbose = FALSE)
+    a <- vpredict(q, h2 ~ V1/(V1 + V2) )
+    a$Estimate
+}, cl = 32)
+
+dfh2 <- data.frame(geneID = traits, h2 = unlist(dfh2))
+write.csv(dfh2, file='e340_narrow_hb.csv')
+
+######
+dfh2 <- read.csv(file = "e340_narrow_hb.csv", row.names = 1)
+dfHb <- read.csv(file = 'e340_Heritability.csv', row.names = 1)
+dfHb <- subset(dfHb, Hb < 1 & Hb > 0)
+dfh2 <- subset(dfh2, h2 < 1 & h2 > 0)
+
+dfh = merge(dfh2, dfHb, by = 'geneID')
+head(dfh)
+
+
+library(ggplot2)
+
+p1 <- ggplot(dfh, aes(x=Hb, y = h2))+
+  stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white") +
+  geom_smooth(method = lm, color = "red", fill = "#69b3a2", se=TRUE) +
+  xlab('Broad sense heritability') +
+  ylab('Narrow sense heritability') +
+  theme_classic()
+p1
+
+p2 <- ggplot(dfh, aes(x=Hb, y = h2)) + 
+  geom_point() + 
+  geom_smooth(method = lm, color = "red", fill = "#69b3a2", se=TRUE) +
+  xlab('Broad sense heritability') +
+  ylab('Narrow sense heritability') +
+  theme_classic()
+p2
+
+
+
+
+
+