1
+ library(matrixStats )
2
+ library(lsa )
3
+ library(ape )
4
+ library(parallel )
5
+ library(picante )
6
+
7
+ # ############
8
+ # Estimate NJ and bootstrapped NJ trees
9
+ # ############
10
+
11
+ # read in data
12
+ dat <- read.table(' data/orthogroups.TMM.EXPR.matrix' ,sep = ' \t ' , header = T , row.names = 1 )
13
+ # square-root transform
14
+ dat <- as.matrix(sqrt(dat ))
15
+
16
+ # take the mean expression value for each species. Unfortunately this code is very specific to this dataset
17
+ ccryp <- dat [,1 ]
18
+ cdubi <- rowMeans2(dat , cols = c(2 : 4 ))
19
+ cgray <- rowMeans2(dat , cols = c(5 : 7 ))
20
+ chamu <- rowMeans2(dat , cols = c(8 : 10 ))
21
+ cnert <- rowMeans2(dat , cols = c(11 : 13 ))
22
+ crust <- rowMeans2(dat , cols = c(14 : 15 ))
23
+ cseto <- rowMeans2(dat , cols = c(16 : 18 ))
24
+ ctene <- rowMeans2(dat , cols = c(19 : 21 ))
25
+ oaust <- rowMeans2(dat , cols = c(22 : 24 ))
26
+ oinco <- rowMeans2(dat , cols = c(25 : 26 ))
27
+ pfall <- rowMeans2(dat , cols = c(27 : 29 ))
28
+ phors <- rowMeans2(dat , cols = c(30 : 31 ))
29
+ pluci <- rowMeans2(dat , cols = c(32 : 33 ))
30
+ ppall <- dat [,34 ]
31
+
32
+ # create a new data matrix from the species means
33
+ dat <- data.frame (CCRYP = ccryp ,CDUBI = cdubi , CGRAY = cgray ,CHAMU = chamu ,CNERT = cnert ,CRUST = crust ,CSETO = cseto ,CTENE = ctene ,OAUST = oaust ,OINCO = oinco ,PFALL = pfall ,PHORS = phors ,PLUCI = pluci ,PPALL = ppall )
34
+ dat <- as.matrix(dat )
35
+ # create a distance matrix using pairwise peason distances
36
+ mat <- as.dist(1 - cor(dat ,method = ' pearson' ))
37
+ # generate the NJ tree
38
+ mynj <- nj(mat )
39
+
40
+ # generate the 10000 boostrapp NJ trees
41
+ dat <- t(dat )
42
+ bstrees <- boot.phylo(phy = mynj , x = dat , FUN = function (xx ) nj(1 - cor(t(xx ),method = ' pearson' )), B = 10000 , trees = T , mc.cores = 4 )$ trees
43
+
44
+ write.tree(bstrees ,' boot.pearson.species.tre' )
0 commit comments