Laplace sampling (#16)

* Starting C++ implementation * Python bindings * Python tests * Python: Clean up * Julia bindings and tests * Julia: re-enable other tests * R bindings and tests * R: re-enable other tests * Fix python test
WardBrian · Mar 26, 2024 · 3f2c165 · 3f2c165
1 parent 9934a59
commit 3f2c165
Show file tree

Hide file tree

Showing 19 changed files with 827 additions and 23 deletions.
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,4 @@ make/local
 .Rhistory
 
 Manifest.toml
+.coverage
diff --git a/TODO.md b/TODO.md
@@ -6,7 +6,7 @@
     - [ ] Version checking
 - [ ] Fixed param sampler for 0 dimension parameters?
 - [ ] Add wrapper around generate quantities method?
-- [ ] Add wraper around laplace sampling?
+- [x] Add wraper around laplace sampling?
 - [x] Pathfinder: expose the no lp/no PSIS version
   - [x] Pathfinder: now change single-path behavior to run PSIS?
 - [x] Add ability to interrupt the algorithms during runs (Ctrl+C)
@@ -27,6 +27,7 @@
 
 
 ## Other
+- [ ] Documentation
 - [x] Set up visibility such that all non-API symbols are hidden
 - [x] Rename
 - [ ] Look into cmake/clang-cl builds
diff --git a/clients/R/R/tinystan.R b/clients/R/R/tinystan.R
@@ -11,6 +11,8 @@ OptimizationAlgorithm <- list(NEWTON = 0, BFGS = 1, LBFGS = 2)
 
 OPTIMIZATION_VARIABLES = c("lp__")
 
+LAPLACE_VARIABLES = c("log_p__", "log_g__")
+
 #' @export
 StanModel <- R6::R6Class("StanModel", public = list(initialize = function(lib, stanc_args = NULL,
     make_args = NULL, warn = TRUE) {
@@ -200,6 +202,56 @@ StanModel <- R6::R6Class("StanModel", public = list(initialize = function(lib, s
 
         output_as_rvars(params, 1, 1, vars$out)
     })
+}, laplace_sample = function(mode, data = "", num_draws = 1000, jacobian = TRUE,
+    calculate_lp = TRUE, save_hessian = FALSE, seed = NULL, refresh = 0, num_threads = -1) {
+
+    if (num_draws < 1) {
+        stop("num_draws must be at least 1")
+    }
+    if (is.null(seed)) {
+        seed <- as.integer(runif(1, min = 0, max = (2^31)))
+    }
+
+    private$with_model(data, seed, {
+        params <- c(LAPLACE_VARIABLES, private$get_parameter_names(model))
+        num_params <- length(params)
+        free_params <- private$get_free_params(model)
+
+        if (save_hessian) {
+            hessian_size <- free_params * free_params
+        } else {
+            hessian_size <- 1
+        }
+
+        if (is.numeric(mode)) {
+            if (length(mode) != num_params - length(LAPLACE_VARIABLES)) {
+                stop("Mode array has incorrect length.")
+            }
+            mode_array <- as.double(mode)
+            mode_json <- as.character("")
+            use_array <- TRUE
+        } else {
+            mode_array <- as.double(0)
+            mode_json <- as.character(mode)
+            use_array <- FALSE
+        }
+
+        vars <- .C("tinystan_laplace_sample_R", return_code = as.integer(0), as.raw(model),
+            as.logical(use_array), mode_array, mode_json, as.integer(seed), as.integer(num_draws),
+            as.logical(jacobian), as.logical(calculate_lp), as.integer(refresh),
+            as.integer(num_threads), out = double(num_params * num_draws), as.integer(num_params *
+                num_draws), as.logical(save_hessian), hessian = double(hessian_size),
+            err = raw(8), PACKAGE = private$lib_name)
+        handle_error(vars$return_code, private$lib_name, vars$err)
+
+        out <- output_as_rvars(params, num_draws, 1, vars$out)
+        if (save_hessian) {
+            hessian <- array(vars$hessian, dim = c(free_params, free_params))
+            return(list(draws = out, hessian = hessian))
+        }
+        out
+    })
+
 }), private = list(lib = NA, lib_name = NA, sep = NA, with_model = function(data,
     seed, block) {
     ffi_ret <- .C("tinystan_create_model_R", model = raw(8), as.character(data),

diff --git a/clients/R/tests/testthat/test_compile.R b/clients/R/tests/testthat/test_compile.R
@@ -1,5 +1,3 @@
-
-
 test_that("compilation works", {
     name <- "gaussian"
     file <- file.path(stan_folder, name, paste0(name, ".stan"))

diff --git a/clients/R/tests/testthat/test_laplace.R b/clients/R/tests/testthat/test_laplace.R
@@ -0,0 +1,108 @@
+BERNOULLI_MODE <- "{\"theta\": 0.25}"
+
+test_that("data arguments work", {
+
+    out1 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA)
+    expect_true(mean(out1$theta) > 0.22 && mean(out1$theta) < 0.28)
+
+    data_file <- file.path(stan_folder, "bernoulli", "bernoulli.data.json")
+    out2 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, data = data_file)
+    expect_true(mean(out2$theta) > 0.22 && mean(out2$theta) < 0.28)
+})
+
+test_that("output sizes are correct", {
+    out1 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws = 324)
+    expect_equal(posterior::ndraws(out1), 324)
+})
+
+test_that("calculate_lp works", {
+    out <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws = 500,
+        calculate_lp = TRUE)
+    expect_equal(sum(is.nan(posterior::draws_of(out$log_p__))), 0)
+
+    out2 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws = 500,
+        calculate_lp = FALSE)
+    expect_equal(sum(is.nan(posterior::draws_of(out2$log_p__))), 500)
+})
+
+
+test_that("jacobian arg works", {
+    for (jacobian in c(TRUE, FALSE)) {
+
+        out <- simple_jacobian_model$optimize(jacobian = jacobian, seed = 1234)
+        sigma <- posterior::extract_variable(out, "sigma")
+
+        draws <- simple_jacobian_model$laplace_sample(c(sigma), jacobian = jacobian,
+            seed = 1234)
+        sigma <- mean(posterior::extract_variable(draws, "sigma"))
+        if (jacobian) {
+            expect_equal(sigma, 3.3, tolerance = 0.2, ignore_attr = TRUE)
+        } else {
+            expect_equal(sigma, 3, tolerance = 0.2, ignore_attr = TRUE)
+        }
+
+    }
+})
+
+test_that("save_hessian works", {
+    data <- "{\"N\": 3}"
+    mode <- "{\"alpha\": [0.1,0.2,0.3]}"
+
+    out <- gaussian_model$laplace_sample(mode, data, save_hessian = TRUE)
+    expect_true("hessian" %in% names(out))
+    expect_equal(dim(out$hessian), c(3, 3))
+    expect_equal(out$hessian, matrix(c(-1, 0, 0, 0, -1, 0, 0, 0, -1), nrow = 3))
+})
+
+test_that("seed works", {
+
+    out1 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, seed = 123)
+    out2 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, seed = 123)
+
+    expect_equal(out1$theta, out2$theta)
+
+    out3 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, seed = 456)
+    expect_error(expect_equal(out1$theta, out3$theta))
+
+})
+
+
+test_that("bad data handled properly", {
+
+    data <- "{\"N\": -1}"
+    expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, data), "greater than or equal to 0")
+
+    data <- "{\"N\": 1, \"y\": [1,2]}"
+    expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, data), "mismatch in dimension")
+
+    expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, "{\"bad\"}"), "Error in JSON parsing")
+
+    expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, "not/real/path.json"),
+        "Could not open data file")
+
+})
+
+test_that("bad mode array handled properly", {
+    mode1 = c(2)
+    expect_error(bernoulli_model$laplace_sample(mode1, BERNOULLI_DATA), "Bounded variable is 2")
+
+    mode2 = c(0.5, 0.5)
+    expect_error(bernoulli_model$laplace_sample(mode2, BERNOULLI_DATA), "incorrect length")
+})
+
+test_that("bad mode json handled properly", {
+    mode <- "{\"theta\": 2}"
+    expect_error(bernoulli_model$laplace_sample(mode, BERNOULLI_DATA), "Bounded variable is 2")
+
+    mode <- "{\"theta\": [0.5, 0.5]}"
+    expect_error(bernoulli_model$laplace_sample(mode, BERNOULLI_DATA), "mismatch in number")
+
+    expect_error(bernoulli_model$laplace_sample("bad/path.json", BERNOULLI_DATA),
+        "Could not open data file")
+})
+
+
+test_that("bad num_draws raises errors", {
+    expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws = 0),
+        "at least 1")
+})
diff --git a/clients/R/tests/testthat/test_pathfinder.R b/clients/R/tests/testthat/test_pathfinder.R
@@ -36,13 +36,13 @@ test_that("output sizes are correct", {
 test_that("calculate_lp works", {
     out <- bernoulli_model$pathfinder(BERNOULLI_DATA, num_paths = 2, calculate_lp = FALSE)
 
-    expect_gt(sum(is.nan(out$lp__)), 0)
-    expect_lt(sum(is.nan(out$lp__)), 2000)
+    expect_gt(sum(is.nan(posterior::draws_of(out$lp__))), 0)
+    expect_lt(sum(is.nan(posterior::draws_of(out$lp__))), 2000)
 
     out_single <- bernoulli_model$pathfinder(BERNOULLI_DATA, num_paths = 1, calculate_lp = FALSE)
 
-    expect_gt(sum(is.nan(out_single$lp__)), 0)
-    expect_lt(sum(is.nan(out_single$lp__)), 1000)
+    expect_gt(sum(is.nan(posterior::draws_of(out_single$lp__))), 0)
+    expect_lt(sum(is.nan(posterior::draws_of(out_single$lp__))), 1000)
 })
 
 test_that("seed works", {

diff --git a/clients/julia/src/TinyStan.jl b/clients/julia/src/TinyStan.jl
@@ -6,6 +6,7 @@ export Model,
     sample,
     optimize,
     OptimizationAlgorithm,
+    laplace_sample,
     api_version,
     compile_model,
     set_tinystan_path!

diff --git a/clients/julia/src/model.jl b/clients/julia/src/model.jl
@@ -25,6 +25,8 @@ const PATHFINDER_VARIABLES = ["lp_approx__", "lp__"]
 
 const OPTIMIZE_VARIABLES = ["lp__"]
 
+const LAPLACE_VARIABLES = ["log_p__", "log_q__"]
+
 const exceptions = [ErrorException, ArgumentError, _ -> InterruptException()]
 
 mutable struct Model
@@ -529,3 +531,92 @@ function optimize(
         return (param_names, out)
     end
 end
+
+
+function laplace_sample(
+    model::Model,
+    mode::Union{String,Array{Float64}},
+    data::String = "";
+    num_draws::Int = 1000,
+    jacobian::Bool = true,
+    calculate_lp::Bool = true,
+    save_hessian::Bool = false,
+    seed::Union{UInt32,Nothing} = nothing,
+    refresh::Int = 0,
+    num_threads::Int = -1,
+)
+    if num_draws < 1
+        error("num_draws must be at least 1")
+    end
+    if seed === nothing
+        seed = rand(UInt32)
+    end
+
+    with_model(model, data, seed) do model_ptr
+        param_names = cat(LAPLACE_VARIABLES, get_names(model, model_ptr), dims = 1)
+        num_params = length(param_names)
+        out = zeros(Float64, num_params, num_draws)
+
+        if save_hessian
+            free_params = num_free_params(model, model_ptr)
+            hessian_out = zeros(Float64, free_params, free_params)
+        else
+            hessian_out = C_NULL
+        end
+
+        if mode isa String
+            mode_json = mode
+            mode_array = C_NULL
+        else
+            mode_json = C_NULL
+            mode_array = mode
+
+            if length(mode_array) != (num_params - length(LAPLACE_VARIABLES))
+                error(
+                    "Mode array has incorrect length. Expected $num_params" *
+                    " but got $(length(mode_array))",
+                )
+            end
+        end
+
+        err = Ref{Ptr{Cvoid}}()
+        return_code = ccall(
+            Libc.Libdl.dlsym(model.lib, :tinystan_laplace_sample),
+            Cint,
+            (
+                Ptr{Cvoid},
+                Ptr{Cdouble},
+                Cstring,
+                Cuint,
+                Cint,
+                Cint, # really bool
+                Cint, # really bool
+                Cint,
+                Cint,
+                Ref{Cdouble},
+                Csize_t,
+                Ptr{Cdouble},
+                Ref{Ptr{Cvoid}},
+            ),
+            model_ptr,
+            mode_array,
+            mode_json,
+            seed,
+            num_draws,
+            Int32(jacobian),
+            Int32(calculate_lp),
+            refresh,
+            num_threads,
+            out,
+            length(out),
+            hessian_out,
+            err,
+        )
+        raise_for_error(model.lib, return_code, err)
+
+        if save_hessian
+            return (param_names, transpose(out), hessian_out)
+        end
+        return (param_names, transpose(out))
+    end
+end
diff --git a/clients/julia/test/runtests.jl b/clients/julia/test/runtests.jl
@@ -22,4 +22,5 @@ simple_jacobian_model =
     include("test_sample.jl")
     include("test_pathfinder.jl")
     include("test_optimize.jl")
+    include("test_laplace.jl")
 end