Skip to content

Commit

Permalink
Issue #133, moved kernel.dist() to treekernel.R
Browse files Browse the repository at this point in the history
Deleted deprecated config parsing code from smcConfig.R
Eliminated caching of "self" kernel scores to trees in treekernel.R
  • Loading branch information
ArtPoon committed Feb 23, 2018
1 parent 426f2be commit 78cf900
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 81 deletions.
2 changes: 1 addition & 1 deletion pkg/R/processtree.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ parse.newick <- function(tree) {
stop(".preprocess.tree() requires phylo or character (Newick) object for tree")
}
tree <- ladderize(tree)
tree <- .rescale.tree(tree, config$rescale.mode)
#tree <- .rescale.tree(tree, config$rescale.mode)

return(tree)
}
Expand Down
38 changes: 0 additions & 38 deletions pkg/R/smcABC.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,44 +48,6 @@ simulate.trees <- function(workspace, theta, model, seed=NA, ...) {
}


# formally 'distance'
kernel.dist <- function(t1, t2, decay.factor, rbf.variance, sst.control, rescale.mode, labelPattern, labelReplacement, gamma) {
if (is.null(t1$kernel)) {
stop("t1 missing self kernel in distance()")
}
if (is.null(t2$kernel)) {
stop("t2 missing self kernel in distance()")
}

k <- tree.kernel(
t1,
t2,
lambda=decay.factor,
sigma=rbf.variance,
rho=sst.control,
regexPattern = labelPattern,
regexReplacement = labelReplacement,
gamma=gamma
)

result <- 1. - k / sqrt(t1$kernel * t2$kernel)
if (result < 0 || result > 1) {
stop(
cat("ERROR: kernel.dist() value outside range [0,1].\n",
"k: ", k, "\n",
"t1$kernel: ", t1$kernel, "\n",
"t2$kernel: ", t2$kernel, "\n"
)
)
}
if (is.nan(result)) {
cat("t1$kernel:", t1$kernel, "\n")
cat("t2$kernel:", t2$kernel, "\n")
}
return (result)
}


# Applies config$dist expression to trees x and y
distance <- function(x, y, config) {

Expand Down
43 changes: 1 addition & 42 deletions pkg/R/smcConfig.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,51 +113,10 @@ load.config <- function(file) {

# Parse & validate distance expression
config$dist <- parse.distance(settings$distances)

# Parse Kernel Settings
if (is.list(settings$distances)) {
if (is.element('kernel.dist', names(settings$distances))) {
kernel.settings <- settings$distances[['kernel.dist']]
config$decay.factor <- kernel.settings$decay.factor
config$rbf.variance <- kernel.settings$rbf.variance
config$sst.control <- kernel.settings$sst.control
config$rescale.mode <- kernel.settings$rescale.mode
config$labelPattern <- kernel.settings$labelPattern
config$labelReplacement <- kernel.settings$labelReplacement
config$gamma <- kernel.settings$gamma
}
} else if (is.character(settings$distances)) {
# parse kernel settings from string
dist.list <- strsplit(settings$distances, "+", fixed=TRUE)[[1]]
for (dist in dist.list) {
if (grepl("kernel.dist", dist)) {
match <- regexpr("\\(.+\\)", dist, perl=TRUE)
args <- regmatches(dist, match)
args <- gsub("[( )]", "", args)
kernel.settings <- strsplit(args, ",", fixed=TRUE)[[1]]
names <- c()
values <- c()
for (parm in kernel.settings) {
split <- strsplit(parm, "=", fixed=TRUE)[[1]]
name <- split[1]
value <- split[2]
names <- c(names, name)
values <- c(values, value)
}
names(values) <- names
config$decay.factor <- as.numeric(values["decay.factor"])
config$rbf.variance <- as.numeric(values["rbf.variance"])
config$sst.control <- as.numeric(values["sst.control"])
config$rescale.mode <- values["rescale.mode"]
config$labelPattern <- values["labelPattern"]
config$labelReplacement <- values["labelReplacement"]
config$gamma <- as.numeric(values["gamma"])
}
}
}
return (config)
}


parse.distance <- function(distance) {
# generate matrix of accepted tree statistic functions from 'metrics' list which can be added to over time without altering the rest of the function
# if value of 1, only one variable required in distance function call (ie. sackin(x) - sackin(y))
Expand Down
68 changes: 68 additions & 0 deletions pkg/R/treekernel.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,74 @@
# return(result)
# }


# formally 'distance'
kernel.dist <- function(t1, t2, decay.factor, rbf.variance, sst.control, rescale.mode, labelPattern, labelReplacement, gamma) {
if (is.null(t1$kernel)) {
stop("t1 missing self kernel in distance()")
}
if (is.null(t2$kernel)) {
stop("t2 missing self kernel in distance()")
}

# rescale branch lengths
nt1 <- .rescale.tree(t1, rescale.mode)
nt2 <- .rescale.tree(t2, rescale.mode)

k12 <- tree.kernel(
nt1,
nt2,
lambda=decay.factor,
sigma=rbf.variance,
rho=sst.control,
regexPattern = labelPattern,
regexReplacement = labelReplacement,
gamma=gamma
)

# we can no longer cache a tree's kernel score to itself because a distance may potentially
# comprise more than one kernel
k11 <- tree.kernel(
nt1,
nt1,
lambda=decay.factor,
sigma=rbf.variance,
rho=sst.control,
regexPattern = labelPattern,
regexReplacement = labelReplacement,
gamma=gamma
)

k22 <- tree.kernel(
nt2,
nt2,
lambda=decay.factor,
sigma=rbf.variance,
rho=sst.control,
regexPattern = labelPattern,
regexReplacement = labelReplacement,
gamma=gamma
)

#result <- 1. - k / sqrt(t1$kernel * t2$kernel)
result <- 1. - k12 / sqrt(k11 * k22)
if (result < 0 || result > 1) {
stop(
cat("ERROR: kernel.dist() value outside range [0,1].\n",
"k12: ", k12, "\n",
"k11: ", k11, "\n",
"k22: ", k22, "\n"
)
)
}
if (is.nan(result)) {
cat("k11:", k11, "\n")
cat("k22:", k22, "\n")
}
return (result)
}


tree.kernel <- function(tree1, tree2,
lambda, # decay factor
sigma, # RBF variance parameter
Expand Down

0 comments on commit 78cf900

Please sign in to comment.