-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlimma_voom_gvex.scripts.txt
106 lines (76 loc) · 3.88 KB
/
limma_voom_gvex.scripts.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
##################### BRAIN-GVEX #####################
## Process data
library(magrittr)
path <- "/Users/ibrahiaa/rnaseq_brain_data_sets/PsychENCODE/asbjørn_si_score/DE_OFC_5sets_TSD/"
clinMeta <- read.csv(paste0(path," info_GVEX.txt"), sep="\t", header=T)
count <- read.csv(paste0(path," count_GVEX.txt"), sep=",", header=T, check.names=F)
colnames(count)[1] <- "gene_id"
id <- subset(clinMeta, Study=="SMRI" & PrimaryDiagnosis %in% c("BD","HC") & Ethnicity =="CAUC")$SampleID %>% unique
data <- count[,c("gene_id",id)]
data$gene_id <- gsub("\\..*","",data$gene_id)
data <- data[ duplicated(data$gene_id) == F,]
rownames(data) <- data$gene_id
data <- data[,-1]
#data <- round(data) # round data since RSEM output may be non-integer
colData <- clinMeta
rownames(colData) <- colData$SampleID
colData <- colData[colnames(data),]
all( rownames(colData) == colnames(data))
colData[,c(2,4,5,6)] <- lapply(colData[,c(2,4,5,6)],as.factor)
colData[,c(3,7,8)] <- lapply(colData[,c(3,7,8)],as.numeric)
colData$PrimaryDiagnosis <- relevel(colData$PrimaryDiagnosis, ref="HC")
colData.filt <- subset(colData, SampleID %in% id)
data <- data[,rownames(colData.filt)]
all( rownames(colData.filt) == colnames(data))
library(biomaRt)
ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl", host="useast.ensembl.org")
genemap <- getBM(attributes=c("entrezgene_id","ensembl_gene_id", "external_gene_name", "gene_biotype", "chromosome_name", "start_position", "end_position"), filters="ensembl_gene_id",
values=rownames(data), mart=ensembl)
genemap <- genemap[ duplicated(genemap$ensembl_gene_id) == F,]
rownames(genemap) <- genemap$ensembl_gene_id
## Remove lowly expressed genes
library(edgeR)
keep <- filterByExpr(data, design=colData.filt$PrimaryDiagnosis)
data.filt <- data[keep,]
## Keep only protein-coding and lncRNA genes
genemap2 <- genemap[rownames(data.filt),]
genemap2 <- subset(genemap2, gene_biotype %in% c("protein_coding","lncRNA"))
data.filt <- data.filt[rownames(genemap2),]
## DE analyses
library(limma)
y <- DGEList(counts = data.filt, genes = genemap2, samples=colData.filt)
y <- calcNormFactors(y)
design <- model.matrix(~ PrimaryDiagnosis + ReportedGender + Age + RIN + PMI, data = colData.filt)
v <- voomWithQualityWeights(y, design, plot = TRUE)
fit <- lmFit(v, design)
fit <- eBayes(fit)
summary(decideTests(fit, p.value=0.1))
res <- topTable(fit, coef="PrimaryDiagnosisBD", n=Inf, p.value=1)
write.table(res, "/Users/ibrahiaa/rnaseq_brain_data_sets/PsychENCODE/asbjørn_si_score/de_limma_gvex_smri.txt", sep="\t", row.names=F)
## Plot significant genes
library(ggsci)
library(ggpubr)
col <- get_palette("aaas", 10)
df <- cpm(y, log=TRUE, prior.count=3) %>% t %>% data.frame
df <- df[ , rownames(head(res,10)) ]
colnames(df) <- head(res,10)$external_gene_name
df <- merge(df,colData.filt,by=0)
glist <- lapply(2:11, function (x) {
ggplot() +
geom_boxplot(data=df,aes(x = PrimaryDiagnosis, y = df[,x]), color ="black",size=0.7, outlier.shape=NA, fill="white") +
geom_point(data=df,aes(x = PrimaryDiagnosis, y = df[,x], color = PrimaryDiagnosis, fill=PrimaryDiagnosis), position=position_jitterdodge(), pch=21,size=3.5, color="black", alpha=0.85) +
labs(title=colnames(df)[x],x="",y="") +
scale_colour_manual(values=col[c(2,9,8)]) +
scale_fill_manual(values=col[c(2,9,8)]) +
theme_classic() +
theme(axis.text.x=element_text(size=20,color="black", angle=0, hjust=0.5),
axis.text.y=element_text(size=20,color="black"),
axis.title=element_text(size=16),
plot.title=element_text(size=20,hjust=0.5),
legend.text=element_text(size=20),
legend.title=element_blank(),
plot.margin=unit(c(0,0,0,0),"cm"),
panel.background=element_rect(linetype="solid", color="black", size=1))
})
annotate_figure(ggarrange(plotlist=glist,ncol=5,nrow=2,common.legend=T,legend="none"),
left=text_grob("Normalized expression",size=20,rot=90))