-
Notifications
You must be signed in to change notification settings - Fork 0
/
PGSEA.R
91 lines (79 loc) · 3.27 KB
/
PGSEA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Load the required libraries
library(SSOAP) #To use WikiPathways web service
library(XML)
library(GEOquery) #To download GEO experiments
library(GSEABase) #Gene Set data structures
library(PGSEA) #Parametric Gene Set Enrichment
library(limma) #Used for mining the GSEA results
# Create a SOAPServer instance for the web service
srv = SOAPServer("http://www.wikipathways.org/wpi/webservice/webservice.php");
# Get a list of all pathways from WikiPathways
reply = .SOAP(
srv, "listPathways",
action=I("listPathways"), handlers=NULL
)
# Parse the xml document
doc = xmlParse(reply$content, asText=TRUE)
# Extract the pathway info
pathwayNodes = xmlElementsByTagName(xmlRoot(doc), "pathways", TRUE)
pathways = lapply(pathwayNodes, function(n) {
children = xmlChildren(n, addNames= TRUE)
if(xmlValue(children$species) == "Homo sapiens") {
p = list()
p[["id"]] = xmlValue(children$id)
p[["name"]] = xmlValue(children$name)
p[["species"]] = xmlValue(children$species)
p[["url"]] = xmlValue(children$url)
return(p)
} else {
return() # Skip non-human pathways
}
})
# Remove NULL entries (non-human pathways)
pathways = pathways[!sapply(pathways, is.null)]
# A function that downloads a GeneSet for a pathway
createGS = function(p) {
print(p[["id"]])
# Download the gene list (translated to Entrez ids)
reply = .SOAP(srv, "getXrefList", pwId = p[["id"]], code="L",
action=I("getXrefList"), handlers=NULL)
doc = xmlParse(reply$content, asText=TRUE)
# Find the xref nodes with an xpath query
resultNodes = xmlElementsByTagName(xmlRoot(doc), "xrefs", TRUE)
# Extract the gene ids
geneIds = sapply(resultNodes, xmlValue)
if(length(geneIds) > 0) { # Skip empty lists
# Create a GeneSet object
geneSet = GeneSet(geneIds, geneIdType=EntrezIdentifier(),
setName=paste(p[["id"]], " (", p[["name"]], ")", sep="")
)
return(geneSet)
}
}
geneSets = lapply(pathways, createGS) #Apply the createGS function on all pathways
geneSets = geneSets[!sapply(geneSets, is.null)] #Remove empty sets
geneSetCollection = GeneSetCollection(geneSets)
# Save the downloaded gene sets, so we can use them for later calculations
save(geneSetCollection, file="/home/thomas/code/webservice-examples/Hs_genesets.Rd")
# Get an expression set from GEO
gse = getGEO("GSE7023", GSEMatrix = TRUE)
eset = gse[[1]]
# Reformat the phenotype labels
subtype = gsub("subtype: ", "", phenoData(eset)$characteristics_ch1)
subtype = gsub("\\.", "_", subtype)
# Run the PGSEA calculation (reference sample is 'NO')
pg = PGSEA(eset, geneSetCollection, ref = which(subtype == "NO"), p.value=0.005)
# Plot the result matrix
smcPlot(
pg, factor(subtype), scale=c(-15,15), show.grid = TRUE, margins = c(1, 1, 4, 10), col = .rwb, r.cex = 0.35
)
# Find the pathways most different between P1 and P2B
pgNF <- PGSEA(eset, geneSetCollection, ref = which(subtype == "NO"), p.value = NA)
design <- model.matrix(~-1 + factor(subtype))
colnames(design) <- names(table(subtype))
fit <- lmFit(pgNF, design)
contrast.matrix <- makeContrasts(P2B - P1, levels = design)
fit <- contrasts.fit(fit, contrast.matrix)
fit <- eBayes(fit)
# Plot the top 25
smcPlot(pg[as.numeric(rownames(topTable(fit, n = 30, resort.by = "logFC"))),], factor(subtype, levels = c("P1", "P2B")), col = .rwb, scale = c(-15, 15), margins = c(1, 1, 4, 10), show.grid = TRUE, r.cex = 0.75)