From 01a92c2c5e8b24f8656ecdbe82de24278b61bc51 Mon Sep 17 00:00:00 2001
From: Robin Lovelace <rob00x@gmail.com>
Date: Mon, 18 Apr 2022 14:52:57 +0100
Subject: [PATCH] Create si_predict/si_calculate for #5

---
 NAMESPACE                            |  3 +-
 R/si_model.R                         | 39 ++++++++---
 README.md                            |  4 +-
 README.qmd                           |  4 +-
 data-raw/si_model.R                  | 97 ++++++++++++++++++++++++++++
 man/{si_model.Rd => si_calculate.Rd} | 18 +++---
 man/si_predict.Rd                    | 30 +++++++++
 vignettes/si.Rmd                     |  4 +-
 8 files changed, 174 insertions(+), 25 deletions(-)
 create mode 100644 data-raw/si_model.R
 rename man/{si_model.Rd => si_calculate.Rd} (71%)
 create mode 100644 man/si_predict.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 4150e82..f55edca 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
-export(si_model)
+export(si_calculate)
+export(si_predict)
 export(si_to_od)
 importFrom(rlang,.data)
diff --git a/R/si_model.R b/R/si_model.R
index 92afa49..8ce6e36 100644
--- a/R/si_model.R
+++ b/R/si_model.R
@@ -1,4 +1,4 @@
-#' Run a spatial interaction model
+#' Calculate flow using a pre-existing function
 #'
 #' Executes a spatial interaction model based on an OD data frame 
 #' and user-specified function
@@ -7,7 +7,7 @@
 #'   [si_to_od()]
 #' @param fun A function that calculates the interaction (e.g. the number of trips)
 #'   between each OD pair
-#' @param var_p Use this argument to run a 'production constrained' SIM.
+#' @param constraint_p Use this argument to run a 'production constrained' SIM.
 #'   Character string corresponding to column in the `od`
 #'   dataset that constrains the total 'interaction' (e.g. n. trips) for all OD pairs
 #'   such that the total for each zone of origin cannot go above this value.
@@ -17,22 +17,43 @@
 #' @examples
 #' od = si_to_od(si_zones, si_zones, max_dist = 4000)
 #' fun_dd = function(od, d = "distance_euclidean", beta = 0.3) exp(-beta * od[[d]] / 1000)
-#' od_dd = si_model(od, fun = fun_dd)
+#' od_dd = si_calculate(od, fun = fun_dd)
 #' plot(od$distance_euclidean, od_dd$res)
 #' fun = function(od, O, n, d, beta) od[[O]] * od[[n]] * exp(-beta * od[[d]]/1000)
-#' od_output = si_model(od, fun = fun, beta = 0.3, O = "origin_all", 
+#' od_output = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", 
 #'   n = "destination_all", d = "distance_euclidean")
 #' head(od_output)
 #' plot(od$distance_euclidean, od_output$res)
-#' od_pconst = si_model(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all",
-#'   d = "distance_euclidean", var_p = origin_all)
+#' od_pconst = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all",
+#'   d = "distance_euclidean", constraint_p = origin_all)
 #' plot(od_pconst$distance_euclidean, od_pconst$res)
 #' plot(od_pconst["res"], logz = TRUE)
-si_model = function(od, fun, var_p, ...) {
+si_calculate = function(od, fun, constraint_p, ...) {
   od$res = fun(od, ...)
-  if (!missing(var_p)) {
+  if (!missing(constraint_p)) {
     od_grouped = dplyr::group_by(od, .data$O)
-    od_grouped = dplyr::mutate(od_grouped, res = .data$res / sum(.data$res) * mean( {{var_p}} ))
+    od_grouped = dplyr::mutate(od_grouped, res = .data$res / sum(.data$res) * mean( {{constraint_p}} ))
+    od = dplyr::ungroup(od_grouped)
+  }
+  od
+}
+#' Predict si model based on pre-trained model
+#' 
+#' @param model A model object, e.g. from [lm()] or [glm()]
+#' @inheritParams si_calculate
+#' @seealso si_calculate
+#' @export 
+#' @examples
+#' od = si_to_od(si_zones, si_zones, max_dist = 4000)
+#' m = lm(od$origin_all ~ od$origin_bicycle)
+#' od_updated = si_predict(od, m)
+si_predict = function(od, model, constraint_p) {
+  od$res = stats::predict(model, od)
+  if (!missing(constraint_p)) {
+    od_grouped = dplyr::group_by(od, .data$O)
+    od_grouped = dplyr::mutate(
+      od_grouped, res = .data$res / sum(.data$res) * mean( {{constraint_p}} )
+      )
     od = dplyr::ungroup(od_grouped)
   }
   od
diff --git a/README.md b/README.md
index b367c3b..0d59c9c 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ gravity_model = function(od, beta) {
     exp(-beta * od[["distance_euclidean"]] / 1000)
 } 
 # perform SIM
-od_res = si_model(od, fun = gravity_model, beta = 0.3, var_p = origin_all)
+od_res = si_calculate(od, fun = gravity_model, beta = 0.3, var_p = origin_all)
 # visualize the results
 plot(od_res$distance_euclidean, od_res$res)
 ```
@@ -62,7 +62,7 @@ approaches to be implemented. This flexibility is a key aspect of the
 package, enabling small and easily modified functions to be implemented
 and tested.
 
-The output `si_model()` is a geographic object which can be plotted as a
+The output `si_calculate)` is a geographic object which can be plotted as a
 map:
 
 ``` r
diff --git a/README.qmd b/README.qmd
index 93c968e..83fb780 100644
--- a/README.qmd
+++ b/README.qmd
@@ -50,7 +50,7 @@ gravity_model = function(od, beta) {
     exp(-beta * od[["distance_euclidean"]] / 1000)
 } 
 # perform SIM
-od_res = si_model(od, fun = gravity_model, beta = 0.3, var_p = origin_all)
+od_res = si_calculate(od, fun = gravity_model, beta = 0.3, var_p = origin_all)
 # visualize the results
 plot(od_res$distance_euclidean, od_res$res)
 ```
@@ -60,7 +60,7 @@ As the example above shows, the package allows/encourages you to define and use
 The resulting estimates of interaction, returned in the column `res` and plotted with distance in the graphic above, resulted from our choice of spatial interaction model inputs, allowing a wide range of alternative approaches to be implemented.
 This flexibility is a key aspect of the package, enabling small and easily modified functions to be implemented and tested.
 
-The output `si_model()` is a geographic object which can be plotted as a map:
+The output `si_calculate)` is a geographic object which can be plotted as a map:
 
 ```{r map}
 plot(od_res["res"], logz = TRUE)
diff --git a/data-raw/si_model.R b/data-raw/si_model.R
new file mode 100644
index 0000000..4f7ef5d
--- /dev/null
+++ b/data-raw/si_model.R
@@ -0,0 +1,97 @@
+# Aim: test different configurations for si_model/predict
+devtools::load_all()
+remotes::install_cran()
+
+# What we have currently:
+od = si_to_od(si_zones, si_zones, max_dist = 4000)
+fun_dd = function(od, d = "distance_euclidean", beta = 0.3) exp(-beta * od[[d]] / 1000)
+od_dd = si_calculate(od, fun = fun_dd)
+plot(od$distance_euclidean, od_dd$res)
+fun = function(od, O, n, d, beta) od[[O]] * od[[n]] * exp(-beta * od[[d]]/1000)
+od_output = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", 
+  n = "destination_all", d = "distance_euclidean")
+head(od_output)
+plot(od$distance_euclidean, od_output$res)
+od_pconst = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all",
+  d = "distance_euclidean", var_p = origin_all)
+plot(od_pconst$distance_euclidean, od_pconst$res)
+plot(od_pconst["res"], logz = TRUE)
+si_model = function(od, fun, var_p, ...) {
+  od$res = fun(od, ...)
+  if (!missing(var_p)) {
+    od_grouped = dplyr::group_by(od, .data$O)
+    od_grouped = dplyr::mutate(od_grouped, res = .data$res / sum(.data$res) * mean( {{var_p}} ))
+    od = dplyr::ungroup(od_grouped)
+  }
+  od
+}
+
+# Option 1
+od = si_to_od(si_zones, si_zones, max_dist = 4000)
+fun_dd = function(d, beta = 0.3) exp(-beta * d / 1000)
+od_dd = si_calculate(od, fun = fun_dd, d = distance_euclidean)
+plot(od$distance_euclidean, od_dd$res)
+fun = function(od, O, n, d, beta) od[[O]] * od[[n]] * exp(-beta * od[[d]]/1000)
+od_output = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", 
+  n = "destination_all", d = "distance_euclidean")
+head(od_output)
+plot(od$distance_euclidean, od_output$res)
+od_pconst = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all",
+  d = "distance_euclidean", var_p = origin_all)
+plot(od_pconst$distance_euclidean, od_pconst$res)
+plot(od_pconst["res"], logz = TRUE)
+si_model = function(od, fun, var_p, ...) {
+  browser()
+  res = fun(...)
+  od = dplyr::mutate(od, res = fun(...))
+  od$res = fun(od, ...)
+  if (!missing(var_p)) {
+    od_grouped = dplyr::group_by(od, .data$O)
+    od_grouped = dplyr::mutate(od_grouped, res = .data$res / sum(.data$res) * mean( {{var_p}} ))
+    od = dplyr::ungroup(od_grouped)
+  }
+  od
+}
+
+
+si_dd_exp = function(d, beta) {
+  exp(d * -beta)
+}
+
+si_dd_exp(d = od$distance_euclidean, beta = 0.2)
+dplyr::mutate(od,
+ res = si_dd_exp(distance_euclidean, beta = 0.2)
+ )
+
+si_dd_exp = function(d, beta = 0.1) {
+  exp({{d}} * -beta)
+}
+
+dplyr::mutate(od,
+   res = si_dd_exp(distance_euclidean, beta = 0.2)
+ )
+
+si_model_tidy = function(od, fun, ...) {
+  dplyr::mutate(res = fun(dplyr::vars(...)))
+}
+
+si_model_tidy(od, si_dd_exp, beta = 0.1, d = distance_euclidean)
+
+
+f = flow ~ exp(distance_euclidean * -b)
+f = as.formula(y ~ exp(distance_euclidean * -b))
+with(od, f)
+si_model_standard = function(od, fun, formula, output = "flow") {
+  res = with(od, fun(...))
+  res
+}
+si_model_standard(od)
+
+si_dd_exp = function(d, beta = 0.1) {
+  exp(d * -beta)
+}
+
+
+si_model_standard(od, si_dd_exp, beta = 0.2, d = "distance_euclidean")
+
+
diff --git a/man/si_model.Rd b/man/si_calculate.Rd
similarity index 71%
rename from man/si_model.Rd
rename to man/si_calculate.Rd
index 7ed37cf..7692af3 100644
--- a/man/si_model.Rd
+++ b/man/si_calculate.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/si_model.R
-\name{si_model}
-\alias{si_model}
-\title{Run a spatial interaction model}
+\name{si_calculate}
+\alias{si_calculate}
+\title{Calculate flow using a pre-existing function}
 \usage{
-si_model(od, fun, var_p, ...)
+si_calculate(od, fun, constraint_p, ...)
 }
 \arguments{
 \item{od}{A data frame representing origin-destination data, e.g. as created by
@@ -13,7 +13,7 @@ si_model(od, fun, var_p, ...)
 \item{fun}{A function that calculates the interaction (e.g. the number of trips)
 between each OD pair}
 
-\item{var_p}{Use this argument to run a 'production constrained' SIM.
+\item{constraint_p}{Use this argument to run a 'production constrained' SIM.
 Character string corresponding to column in the \code{od}
 dataset that constrains the total 'interaction' (e.g. n. trips) for all OD pairs
 such that the total for each zone of origin cannot go above this value.}
@@ -27,15 +27,15 @@ and user-specified function
 \examples{
 od = si_to_od(si_zones, si_zones, max_dist = 4000)
 fun_dd = function(od, d = "distance_euclidean", beta = 0.3) exp(-beta * od[[d]] / 1000)
-od_dd = si_model(od, fun = fun_dd)
+od_dd = si_calculate(od, fun = fun_dd)
 plot(od$distance_euclidean, od_dd$res)
 fun = function(od, O, n, d, beta) od[[O]] * od[[n]] * exp(-beta * od[[d]]/1000)
-od_output = si_model(od, fun = fun, beta = 0.3, O = "origin_all", 
+od_output = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", 
   n = "destination_all", d = "distance_euclidean")
 head(od_output)
 plot(od$distance_euclidean, od_output$res)
-od_pconst = si_model(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all",
-  d = "distance_euclidean", var_p = origin_all)
+od_pconst = si_calculate(od, fun = fun, beta = 0.3, O = "origin_all", n = "destination_all",
+  d = "distance_euclidean", constraint_p = origin_all)
 plot(od_pconst$distance_euclidean, od_pconst$res)
 plot(od_pconst["res"], logz = TRUE)
 }
diff --git a/man/si_predict.Rd b/man/si_predict.Rd
new file mode 100644
index 0000000..97c5777
--- /dev/null
+++ b/man/si_predict.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/si_model.R
+\name{si_predict}
+\alias{si_predict}
+\title{Predict si model based on pre-trained model}
+\usage{
+si_predict(od, model, constraint_p)
+}
+\arguments{
+\item{od}{A data frame representing origin-destination data, e.g. as created by
+\code{\link[=si_to_od]{si_to_od()}}}
+
+\item{model}{A model object, e.g. from \code{\link[=lm]{lm()}} or \code{\link[=glm]{glm()}}}
+
+\item{constraint_p}{Use this argument to run a 'production constrained' SIM.
+Character string corresponding to column in the \code{od}
+dataset that constrains the total 'interaction' (e.g. n. trips) for all OD pairs
+such that the total for each zone of origin cannot go above this value.}
+}
+\description{
+Predict si model based on pre-trained model
+}
+\examples{
+od = si_to_od(si_zones, si_zones, max_dist = 4000)
+m = lm(od$origin_all ~ od$origin_bicycle)
+od_updated = si_predict(od, m)
+}
+\seealso{
+si_calculate
+}
diff --git a/vignettes/si.Rmd b/vignettes/si.Rmd
index 66f58db..fbd8bc7 100644
--- a/vignettes/si.Rmd
+++ b/vignettes/si.Rmd
@@ -120,8 +120,8 @@ A simplistic SIM can be created just based on the distance between points:
 si_power = function(od, beta) {
   (od[["distance_euclidean"]] / 1000)^beta
 } 
-od_model = si_model(od_sim, fun = si_power, beta = -0.8)
-plot(od_model["res"], logz = TRUE)
+od_calculate = si_calculate(od_sim, fun = si_power, beta = -0.8)
+plot(od_calculate["res"], logz = TRUE)
 ```
 
 This approach, ignoring all variables at the level of trip origins and destinations, results in flow estimates with no units.