From 01a92c2c5e8b24f8656ecdbe82de24278b61bc51 Mon Sep 17 00:00:00 2001 From: Robin Lovelace Date: Mon, 18 Apr 2022 14:52:57 +0100 Subject: [PATCH] Create si_predict/si_calculate for #5 --- NAMESPACE | 3 +- R/si_model.R | 39 ++++++++--- README.md | 4 +- README.qmd | 4 +- data-raw/si_model.R | 97 ++++++++++++++++++++++++++++ man/{si_model.Rd => si_calculate.Rd} | 18 +++--- man/si_predict.Rd | 30 +++++++++ vignettes/si.Rmd | 4 +- 8 files changed, 174 insertions(+), 25 deletions(-) create mode 100644 data-raw/si_model.R rename man/{si_model.Rd => si_calculate.Rd} (71%) create mode 100644 man/si_predict.Rd diff --git a/NAMESPACE b/NAMESPACE index 4150e82..f55edca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand -export(si_model) +export(si_calculate) +export(si_predict) export(si_to_od) importFrom(rlang,.data) diff --git a/R/si_model.R b/R/si_model.R index 92afa49..8ce6e36 100644 --- a/R/si_model.R +++ b/R/si_model.R @@ -1,4 +1,4 @@ -#' Run a spatial interaction model +#' Calculate flow using a pre-existing function #' #' Executes a spatial interaction model based on an OD data frame #' and user-specified function @@ -7,7 +7,7 @@ #' [si_to_od()] #' @param fun A function that calculates the interaction (e.g. the number of trips) #' between each OD pair -#' @param var_p Use this argument to run a 'production constrained' SIM. +#' @param constraint_p Use this argument to run a 'production constrained' SIM. #' Character string corresponding to column in the `od` #' dataset that constrains the total 'interaction' (e.g. n. trips) for all OD pairs #' such that the total for each zone of origin cannot go above this value. @@ -17,22 +17,43 @@ #' @examples #' od = si_to_od(si_zones, si_zones, max_dist = 4000) #' fun_dd = function(od, d = "distance_euclidean", beta = 0.3) exp(-beta * od[[d]] / 1000) -#' od_dd = si_model(od, fun = fun_dd) +#' od_dd = si_calculate(od, fun = fun_dd) #' plot(od$distance_euclidean, od_dd$res) #' fun = function(od, O, n, d, beta) od[[O]] * od[[n]] * exp(-beta * od[[d]]/1000) -#' od_output = si_model(od, fun = fun, beta = 0.3, O = "origin_all", +#' od_output = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", #' n = "destination_all", d = "distance_euclidean") #' head(od_output) #' plot(od$distance_euclidean, od_output$res) -#' od_pconst = si_model(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all", -#' d = "distance_euclidean", var_p = origin_all) +#' od_pconst = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all", +#' d = "distance_euclidean", constraint_p = origin_all) #' plot(od_pconst$distance_euclidean, od_pconst$res) #' plot(od_pconst["res"], logz = TRUE) -si_model = function(od, fun, var_p, ...) { +si_calculate = function(od, fun, constraint_p, ...) { od$res = fun(od, ...) - if (!missing(var_p)) { + if (!missing(constraint_p)) { od_grouped = dplyr::group_by(od, .data$O) - od_grouped = dplyr::mutate(od_grouped, res = .data$res / sum(.data$res) * mean( {{var_p}} )) + od_grouped = dplyr::mutate(od_grouped, res = .data$res / sum(.data$res) * mean( {{constraint_p}} )) + od = dplyr::ungroup(od_grouped) + } + od +} +#' Predict si model based on pre-trained model +#' +#' @param model A model object, e.g. from [lm()] or [glm()] +#' @inheritParams si_calculate +#' @seealso si_calculate +#' @export +#' @examples +#' od = si_to_od(si_zones, si_zones, max_dist = 4000) +#' m = lm(od$origin_all ~ od$origin_bicycle) +#' od_updated = si_predict(od, m) +si_predict = function(od, model, constraint_p) { + od$res = stats::predict(model, od) + if (!missing(constraint_p)) { + od_grouped = dplyr::group_by(od, .data$O) + od_grouped = dplyr::mutate( + od_grouped, res = .data$res / sum(.data$res) * mean( {{constraint_p}} ) + ) od = dplyr::ungroup(od_grouped) } od diff --git a/README.md b/README.md index b367c3b..0d59c9c 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ gravity_model = function(od, beta) { exp(-beta * od[["distance_euclidean"]] / 1000) } # perform SIM -od_res = si_model(od, fun = gravity_model, beta = 0.3, var_p = origin_all) +od_res = si_calculate(od, fun = gravity_model, beta = 0.3, var_p = origin_all) # visualize the results plot(od_res$distance_euclidean, od_res$res) ``` @@ -62,7 +62,7 @@ approaches to be implemented. This flexibility is a key aspect of the package, enabling small and easily modified functions to be implemented and tested. -The output `si_model()` is a geographic object which can be plotted as a +The output `si_calculate)` is a geographic object which can be plotted as a map: ``` r diff --git a/README.qmd b/README.qmd index 93c968e..83fb780 100644 --- a/README.qmd +++ b/README.qmd @@ -50,7 +50,7 @@ gravity_model = function(od, beta) { exp(-beta * od[["distance_euclidean"]] / 1000) } # perform SIM -od_res = si_model(od, fun = gravity_model, beta = 0.3, var_p = origin_all) +od_res = si_calculate(od, fun = gravity_model, beta = 0.3, var_p = origin_all) # visualize the results plot(od_res$distance_euclidean, od_res$res) ``` @@ -60,7 +60,7 @@ As the example above shows, the package allows/encourages you to define and use The resulting estimates of interaction, returned in the column `res` and plotted with distance in the graphic above, resulted from our choice of spatial interaction model inputs, allowing a wide range of alternative approaches to be implemented. This flexibility is a key aspect of the package, enabling small and easily modified functions to be implemented and tested. -The output `si_model()` is a geographic object which can be plotted as a map: +The output `si_calculate)` is a geographic object which can be plotted as a map: ```{r map} plot(od_res["res"], logz = TRUE) diff --git a/data-raw/si_model.R b/data-raw/si_model.R new file mode 100644 index 0000000..4f7ef5d --- /dev/null +++ b/data-raw/si_model.R @@ -0,0 +1,97 @@ +# Aim: test different configurations for si_model/predict +devtools::load_all() +remotes::install_cran() + +# What we have currently: +od = si_to_od(si_zones, si_zones, max_dist = 4000) +fun_dd = function(od, d = "distance_euclidean", beta = 0.3) exp(-beta * od[[d]] / 1000) +od_dd = si_calculate(od, fun = fun_dd) +plot(od$distance_euclidean, od_dd$res) +fun = function(od, O, n, d, beta) od[[O]] * od[[n]] * exp(-beta * od[[d]]/1000) +od_output = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", + n = "destination_all", d = "distance_euclidean") +head(od_output) +plot(od$distance_euclidean, od_output$res) +od_pconst = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all", + d = "distance_euclidean", var_p = origin_all) +plot(od_pconst$distance_euclidean, od_pconst$res) +plot(od_pconst["res"], logz = TRUE) +si_model = function(od, fun, var_p, ...) { + od$res = fun(od, ...) + if (!missing(var_p)) { + od_grouped = dplyr::group_by(od, .data$O) + od_grouped = dplyr::mutate(od_grouped, res = .data$res / sum(.data$res) * mean( {{var_p}} )) + od = dplyr::ungroup(od_grouped) + } + od +} + +# Option 1 +od = si_to_od(si_zones, si_zones, max_dist = 4000) +fun_dd = function(d, beta = 0.3) exp(-beta * d / 1000) +od_dd = si_calculate(od, fun = fun_dd, d = distance_euclidean) +plot(od$distance_euclidean, od_dd$res) +fun = function(od, O, n, d, beta) od[[O]] * od[[n]] * exp(-beta * od[[d]]/1000) +od_output = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", + n = "destination_all", d = "distance_euclidean") +head(od_output) +plot(od$distance_euclidean, od_output$res) +od_pconst = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all", + d = "distance_euclidean", var_p = origin_all) +plot(od_pconst$distance_euclidean, od_pconst$res) +plot(od_pconst["res"], logz = TRUE) +si_model = function(od, fun, var_p, ...) { + browser() + res = fun(...) + od = dplyr::mutate(od, res = fun(...)) + od$res = fun(od, ...) + if (!missing(var_p)) { + od_grouped = dplyr::group_by(od, .data$O) + od_grouped = dplyr::mutate(od_grouped, res = .data$res / sum(.data$res) * mean( {{var_p}} )) + od = dplyr::ungroup(od_grouped) + } + od +} + + +si_dd_exp = function(d, beta) { + exp(d * -beta) +} + +si_dd_exp(d = od$distance_euclidean, beta = 0.2) +dplyr::mutate(od, + res = si_dd_exp(distance_euclidean, beta = 0.2) + ) + +si_dd_exp = function(d, beta = 0.1) { + exp({{d}} * -beta) +} + +dplyr::mutate(od, + res = si_dd_exp(distance_euclidean, beta = 0.2) + ) + +si_model_tidy = function(od, fun, ...) { + dplyr::mutate(res = fun(dplyr::vars(...))) +} + +si_model_tidy(od, si_dd_exp, beta = 0.1, d = distance_euclidean) + + +f = flow ~ exp(distance_euclidean * -b) +f = as.formula(y ~ exp(distance_euclidean * -b)) +with(od, f) +si_model_standard = function(od, fun, formula, output = "flow") { + res = with(od, fun(...)) + res +} +si_model_standard(od) + +si_dd_exp = function(d, beta = 0.1) { + exp(d * -beta) +} + + +si_model_standard(od, si_dd_exp, beta = 0.2, d = "distance_euclidean") + + diff --git a/man/si_model.Rd b/man/si_calculate.Rd similarity index 71% rename from man/si_model.Rd rename to man/si_calculate.Rd index 7ed37cf..7692af3 100644 --- a/man/si_model.Rd +++ b/man/si_calculate.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/si_model.R -\name{si_model} -\alias{si_model} -\title{Run a spatial interaction model} +\name{si_calculate} +\alias{si_calculate} +\title{Calculate flow using a pre-existing function} \usage{ -si_model(od, fun, var_p, ...) +si_calculate(od, fun, constraint_p, ...) } \arguments{ \item{od}{A data frame representing origin-destination data, e.g. as created by @@ -13,7 +13,7 @@ si_model(od, fun, var_p, ...) \item{fun}{A function that calculates the interaction (e.g. the number of trips) between each OD pair} -\item{var_p}{Use this argument to run a 'production constrained' SIM. +\item{constraint_p}{Use this argument to run a 'production constrained' SIM. Character string corresponding to column in the \code{od} dataset that constrains the total 'interaction' (e.g. n. trips) for all OD pairs such that the total for each zone of origin cannot go above this value.} @@ -27,15 +27,15 @@ and user-specified function \examples{ od = si_to_od(si_zones, si_zones, max_dist = 4000) fun_dd = function(od, d = "distance_euclidean", beta = 0.3) exp(-beta * od[[d]] / 1000) -od_dd = si_model(od, fun = fun_dd) +od_dd = si_calculate(od, fun = fun_dd) plot(od$distance_euclidean, od_dd$res) fun = function(od, O, n, d, beta) od[[O]] * od[[n]] * exp(-beta * od[[d]]/1000) -od_output = si_model(od, fun = fun, beta = 0.3, O = "origin_all", +od_output = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all", d = "distance_euclidean") head(od_output) plot(od$distance_euclidean, od_output$res) -od_pconst = si_model(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all", - d = "distance_euclidean", var_p = origin_all) +od_pconst = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all", + d = "distance_euclidean", constraint_p = origin_all) plot(od_pconst$distance_euclidean, od_pconst$res) plot(od_pconst["res"], logz = TRUE) } diff --git a/man/si_predict.Rd b/man/si_predict.Rd new file mode 100644 index 0000000..97c5777 --- /dev/null +++ b/man/si_predict.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/si_model.R +\name{si_predict} +\alias{si_predict} +\title{Predict si model based on pre-trained model} +\usage{ +si_predict(od, model, constraint_p) +} +\arguments{ +\item{od}{A data frame representing origin-destination data, e.g. as created by +\code{\link[=si_to_od]{si_to_od()}}} + +\item{model}{A model object, e.g. from \code{\link[=lm]{lm()}} or \code{\link[=glm]{glm()}}} + +\item{constraint_p}{Use this argument to run a 'production constrained' SIM. +Character string corresponding to column in the \code{od} +dataset that constrains the total 'interaction' (e.g. n. trips) for all OD pairs +such that the total for each zone of origin cannot go above this value.} +} +\description{ +Predict si model based on pre-trained model +} +\examples{ +od = si_to_od(si_zones, si_zones, max_dist = 4000) +m = lm(od$origin_all ~ od$origin_bicycle) +od_updated = si_predict(od, m) +} +\seealso{ +si_calculate +} diff --git a/vignettes/si.Rmd b/vignettes/si.Rmd index 66f58db..fbd8bc7 100644 --- a/vignettes/si.Rmd +++ b/vignettes/si.Rmd @@ -120,8 +120,8 @@ A simplistic SIM can be created just based on the distance between points: si_power = function(od, beta) { (od[["distance_euclidean"]] / 1000)^beta } -od_model = si_model(od_sim, fun = si_power, beta = -0.8) -plot(od_model["res"], logz = TRUE) +od_calculate = si_calculate(od_sim, fun = si_power, beta = -0.8) +plot(od_calculate["res"], logz = TRUE) ``` This approach, ignoring all variables at the level of trip origins and destinations, results in flow estimates with no units.