From 3f2c1658138f998902edc0bf4a33a1ce7575b3b4 Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Tue, 26 Mar 2024 14:43:15 -0400 Subject: [PATCH] Laplace sampling (#16) * Starting C++ implementation * Python bindings * Python tests * Python: Clean up * Julia bindings and tests * Julia: re-enable other tests * R bindings and tests * R: re-enable other tests * Fix python test --- .gitignore | 1 + TODO.md | 3 +- clients/R/R/tinystan.R | 52 +++++++ clients/R/tests/testthat/test_compile.R | 2 - clients/R/tests/testthat/test_laplace.R | 108 +++++++++++++ clients/R/tests/testthat/test_pathfinder.R | 8 +- clients/julia/src/TinyStan.jl | 1 + clients/julia/src/model.jl | 91 +++++++++++ clients/julia/test/runtests.jl | 1 + clients/julia/test/test_laplace.jl | 171 +++++++++++++++++++++ clients/python/tests/test_laplace.py | 147 ++++++++++++++++++ clients/python/tests/test_pathfinder.py | 1 - clients/python/tests/test_sample.py | 8 + clients/python/tinystan/model.py | 115 +++++++++++++- src/R_shims.cpp | 27 ++++ src/buffer.hpp | 15 +- src/model.hpp | 40 +++++ src/tinystan.cpp | 50 +++++- src/tinystan.h | 9 ++ 19 files changed, 827 insertions(+), 23 deletions(-) create mode 100644 clients/R/tests/testthat/test_laplace.R create mode 100644 clients/julia/test/test_laplace.jl create mode 100644 clients/python/tests/test_laplace.py diff --git a/.gitignore b/.gitignore index 3c39a7b..d831cb1 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ make/local .Rhistory Manifest.toml +.coverage diff --git a/TODO.md b/TODO.md index 344a3b0..3faa7f2 100644 --- a/TODO.md +++ b/TODO.md @@ -6,7 +6,7 @@ - [ ] Version checking - [ ] Fixed param sampler for 0 dimension parameters? - [ ] Add wrapper around generate quantities method? -- [ ] Add wraper around laplace sampling? +- [x] Add wraper around laplace sampling? - [x] Pathfinder: expose the no lp/no PSIS version - [x] Pathfinder: now change single-path behavior to run PSIS? - [x] Add ability to interrupt the algorithms during runs (Ctrl+C) @@ -27,6 +27,7 @@ ## Other +- [ ] Documentation - [x] Set up visibility such that all non-API symbols are hidden - [x] Rename - [ ] Look into cmake/clang-cl builds diff --git a/clients/R/R/tinystan.R b/clients/R/R/tinystan.R index 2598127..a7f7af6 100644 --- a/clients/R/R/tinystan.R +++ b/clients/R/R/tinystan.R @@ -11,6 +11,8 @@ OptimizationAlgorithm <- list(NEWTON = 0, BFGS = 1, LBFGS = 2) OPTIMIZATION_VARIABLES = c("lp__") +LAPLACE_VARIABLES = c("log_p__", "log_g__") + #' @export StanModel <- R6::R6Class("StanModel", public = list(initialize = function(lib, stanc_args = NULL, make_args = NULL, warn = TRUE) { @@ -200,6 +202,56 @@ StanModel <- R6::R6Class("StanModel", public = list(initialize = function(lib, s output_as_rvars(params, 1, 1, vars$out) }) +}, laplace_sample = function(mode, data = "", num_draws = 1000, jacobian = TRUE, + calculate_lp = TRUE, save_hessian = FALSE, seed = NULL, refresh = 0, num_threads = -1) { + + if (num_draws < 1) { + stop("num_draws must be at least 1") + } + if (is.null(seed)) { + seed <- as.integer(runif(1, min = 0, max = (2^31))) + } + + private$with_model(data, seed, { + params <- c(LAPLACE_VARIABLES, private$get_parameter_names(model)) + num_params <- length(params) + free_params <- private$get_free_params(model) + + if (save_hessian) { + hessian_size <- free_params * free_params + } else { + hessian_size <- 1 + } + + if (is.numeric(mode)) { + if (length(mode) != num_params - length(LAPLACE_VARIABLES)) { + stop("Mode array has incorrect length.") + } + mode_array <- as.double(mode) + mode_json <- as.character("") + use_array <- TRUE + } else { + mode_array <- as.double(0) + mode_json <- as.character(mode) + use_array <- FALSE + } + + vars <- .C("tinystan_laplace_sample_R", return_code = as.integer(0), as.raw(model), + as.logical(use_array), mode_array, mode_json, as.integer(seed), as.integer(num_draws), + as.logical(jacobian), as.logical(calculate_lp), as.integer(refresh), + as.integer(num_threads), out = double(num_params * num_draws), as.integer(num_params * + num_draws), as.logical(save_hessian), hessian = double(hessian_size), + err = raw(8), PACKAGE = private$lib_name) + handle_error(vars$return_code, private$lib_name, vars$err) + + out <- output_as_rvars(params, num_draws, 1, vars$out) + if (save_hessian) { + hessian <- array(vars$hessian, dim = c(free_params, free_params)) + return(list(draws = out, hessian = hessian)) + } + out + }) + }), private = list(lib = NA, lib_name = NA, sep = NA, with_model = function(data, seed, block) { ffi_ret <- .C("tinystan_create_model_R", model = raw(8), as.character(data), diff --git a/clients/R/tests/testthat/test_compile.R b/clients/R/tests/testthat/test_compile.R index d5501aa..07e8633 100644 --- a/clients/R/tests/testthat/test_compile.R +++ b/clients/R/tests/testthat/test_compile.R @@ -1,5 +1,3 @@ - - test_that("compilation works", { name <- "gaussian" file <- file.path(stan_folder, name, paste0(name, ".stan")) diff --git a/clients/R/tests/testthat/test_laplace.R b/clients/R/tests/testthat/test_laplace.R new file mode 100644 index 0000000..7ab2b5d --- /dev/null +++ b/clients/R/tests/testthat/test_laplace.R @@ -0,0 +1,108 @@ +BERNOULLI_MODE <- "{\"theta\": 0.25}" + +test_that("data arguments work", { + + out1 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA) + expect_true(mean(out1$theta) > 0.22 && mean(out1$theta) < 0.28) + + data_file <- file.path(stan_folder, "bernoulli", "bernoulli.data.json") + out2 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, data = data_file) + expect_true(mean(out2$theta) > 0.22 && mean(out2$theta) < 0.28) +}) + +test_that("output sizes are correct", { + out1 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws = 324) + expect_equal(posterior::ndraws(out1), 324) +}) + +test_that("calculate_lp works", { + out <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws = 500, + calculate_lp = TRUE) + expect_equal(sum(is.nan(posterior::draws_of(out$log_p__))), 0) + + out2 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws = 500, + calculate_lp = FALSE) + expect_equal(sum(is.nan(posterior::draws_of(out2$log_p__))), 500) +}) + + +test_that("jacobian arg works", { + for (jacobian in c(TRUE, FALSE)) { + + out <- simple_jacobian_model$optimize(jacobian = jacobian, seed = 1234) + sigma <- posterior::extract_variable(out, "sigma") + + draws <- simple_jacobian_model$laplace_sample(c(sigma), jacobian = jacobian, + seed = 1234) + sigma <- mean(posterior::extract_variable(draws, "sigma")) + if (jacobian) { + expect_equal(sigma, 3.3, tolerance = 0.2, ignore_attr = TRUE) + } else { + expect_equal(sigma, 3, tolerance = 0.2, ignore_attr = TRUE) + } + + } +}) + +test_that("save_hessian works", { + data <- "{\"N\": 3}" + mode <- "{\"alpha\": [0.1,0.2,0.3]}" + + out <- gaussian_model$laplace_sample(mode, data, save_hessian = TRUE) + expect_true("hessian" %in% names(out)) + expect_equal(dim(out$hessian), c(3, 3)) + expect_equal(out$hessian, matrix(c(-1, 0, 0, 0, -1, 0, 0, 0, -1), nrow = 3)) +}) + +test_that("seed works", { + + out1 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, seed = 123) + out2 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, seed = 123) + + expect_equal(out1$theta, out2$theta) + + out3 <- bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, seed = 456) + expect_error(expect_equal(out1$theta, out3$theta)) + +}) + + +test_that("bad data handled properly", { + + data <- "{\"N\": -1}" + expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, data), "greater than or equal to 0") + + data <- "{\"N\": 1, \"y\": [1,2]}" + expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, data), "mismatch in dimension") + + expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, "{\"bad\"}"), "Error in JSON parsing") + + expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, "not/real/path.json"), + "Could not open data file") + +}) + +test_that("bad mode array handled properly", { + mode1 = c(2) + expect_error(bernoulli_model$laplace_sample(mode1, BERNOULLI_DATA), "Bounded variable is 2") + + mode2 = c(0.5, 0.5) + expect_error(bernoulli_model$laplace_sample(mode2, BERNOULLI_DATA), "incorrect length") +}) + +test_that("bad mode json handled properly", { + mode <- "{\"theta\": 2}" + expect_error(bernoulli_model$laplace_sample(mode, BERNOULLI_DATA), "Bounded variable is 2") + + mode <- "{\"theta\": [0.5, 0.5]}" + expect_error(bernoulli_model$laplace_sample(mode, BERNOULLI_DATA), "mismatch in number") + + expect_error(bernoulli_model$laplace_sample("bad/path.json", BERNOULLI_DATA), + "Could not open data file") +}) + + +test_that("bad num_draws raises errors", { + expect_error(bernoulli_model$laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws = 0), + "at least 1") +}) diff --git a/clients/R/tests/testthat/test_pathfinder.R b/clients/R/tests/testthat/test_pathfinder.R index 1aa6c90..0c392ad 100644 --- a/clients/R/tests/testthat/test_pathfinder.R +++ b/clients/R/tests/testthat/test_pathfinder.R @@ -36,13 +36,13 @@ test_that("output sizes are correct", { test_that("calculate_lp works", { out <- bernoulli_model$pathfinder(BERNOULLI_DATA, num_paths = 2, calculate_lp = FALSE) - expect_gt(sum(is.nan(out$lp__)), 0) - expect_lt(sum(is.nan(out$lp__)), 2000) + expect_gt(sum(is.nan(posterior::draws_of(out$lp__))), 0) + expect_lt(sum(is.nan(posterior::draws_of(out$lp__))), 2000) out_single <- bernoulli_model$pathfinder(BERNOULLI_DATA, num_paths = 1, calculate_lp = FALSE) - expect_gt(sum(is.nan(out_single$lp__)), 0) - expect_lt(sum(is.nan(out_single$lp__)), 1000) + expect_gt(sum(is.nan(posterior::draws_of(out_single$lp__))), 0) + expect_lt(sum(is.nan(posterior::draws_of(out_single$lp__))), 1000) }) test_that("seed works", { diff --git a/clients/julia/src/TinyStan.jl b/clients/julia/src/TinyStan.jl index fd727fb..793f76f 100644 --- a/clients/julia/src/TinyStan.jl +++ b/clients/julia/src/TinyStan.jl @@ -6,6 +6,7 @@ export Model, sample, optimize, OptimizationAlgorithm, + laplace_sample, api_version, compile_model, set_tinystan_path! diff --git a/clients/julia/src/model.jl b/clients/julia/src/model.jl index fcc68ec..db15654 100644 --- a/clients/julia/src/model.jl +++ b/clients/julia/src/model.jl @@ -25,6 +25,8 @@ const PATHFINDER_VARIABLES = ["lp_approx__", "lp__"] const OPTIMIZE_VARIABLES = ["lp__"] +const LAPLACE_VARIABLES = ["log_p__", "log_q__"] + const exceptions = [ErrorException, ArgumentError, _ -> InterruptException()] mutable struct Model @@ -529,3 +531,92 @@ function optimize( return (param_names, out) end end + + +function laplace_sample( + model::Model, + mode::Union{String,Array{Float64}}, + data::String = ""; + num_draws::Int = 1000, + jacobian::Bool = true, + calculate_lp::Bool = true, + save_hessian::Bool = false, + seed::Union{UInt32,Nothing} = nothing, + refresh::Int = 0, + num_threads::Int = -1, +) + if num_draws < 1 + error("num_draws must be at least 1") + end + if seed === nothing + seed = rand(UInt32) + end + + with_model(model, data, seed) do model_ptr + param_names = cat(LAPLACE_VARIABLES, get_names(model, model_ptr), dims = 1) + num_params = length(param_names) + out = zeros(Float64, num_params, num_draws) + + if save_hessian + free_params = num_free_params(model, model_ptr) + hessian_out = zeros(Float64, free_params, free_params) + else + hessian_out = C_NULL + end + + if mode isa String + mode_json = mode + mode_array = C_NULL + else + mode_json = C_NULL + mode_array = mode + + if length(mode_array) != (num_params - length(LAPLACE_VARIABLES)) + error( + "Mode array has incorrect length. Expected $num_params" * + " but got $(length(mode_array))", + ) + end + end + + err = Ref{Ptr{Cvoid}}() + return_code = ccall( + Libc.Libdl.dlsym(model.lib, :tinystan_laplace_sample), + Cint, + ( + Ptr{Cvoid}, + Ptr{Cdouble}, + Cstring, + Cuint, + Cint, + Cint, # really bool + Cint, # really bool + Cint, + Cint, + Ref{Cdouble}, + Csize_t, + Ptr{Cdouble}, + Ref{Ptr{Cvoid}}, + ), + model_ptr, + mode_array, + mode_json, + seed, + num_draws, + Int32(jacobian), + Int32(calculate_lp), + refresh, + num_threads, + out, + length(out), + hessian_out, + err, + ) + raise_for_error(model.lib, return_code, err) + + if save_hessian + return (param_names, transpose(out), hessian_out) + end + return (param_names, transpose(out)) + end +end diff --git a/clients/julia/test/runtests.jl b/clients/julia/test/runtests.jl index f3704b4..a3c1ac1 100644 --- a/clients/julia/test/runtests.jl +++ b/clients/julia/test/runtests.jl @@ -22,4 +22,5 @@ simple_jacobian_model = include("test_sample.jl") include("test_pathfinder.jl") include("test_optimize.jl") + include("test_laplace.jl") end diff --git a/clients/julia/test/test_laplace.jl b/clients/julia/test/test_laplace.jl new file mode 100644 index 0000000..9e2a0fd --- /dev/null +++ b/clients/julia/test/test_laplace.jl @@ -0,0 +1,171 @@ +@testset "Laplace Sampling" verbose = true begin + + BERNOULLI_MODE = "{\"theta\": 0.25}" + + @testset "Data" begin + (names, draws) = laplace_sample(bernoulli_model, BERNOULLI_MODE, BERNOULLI_DATA) + @test 0.22 < mean(draws[:, names.=="theta"]) < 0.28 + + (names, draws) = laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + joinpath(STAN_FOLDER, "bernoulli", "bernoulli.data.json"), + ) + @test 0.22 < mean(draws[:, names.=="theta"]) < 0.28 + end + + + @testset "Number of draws" begin + (names, draws) = + laplace_sample(bernoulli_model, BERNOULLI_MODE, BERNOULLI_DATA; num_draws = 234) + @test size(draws, 1) == 234 + end + + @testset "Calculate LP" begin + (names, draws) = laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + BERNOULLI_DATA; + num_draws = 500, + calculate_lp = true, + ) + @test sum(isnan.(draws[:, names.=="log_p__"])) == 0 + + (names, draws) = laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + BERNOULLI_DATA; + num_draws = 500, + calculate_lp = false, + ) + @test sum(isnan.(draws[:, names.=="log_p__"])) == 500 + end + + @testset "Jacobian" begin + @testset for jacobian in [true, false] + (names, mode) = + optimize(simple_jacobian_model; jacobian = jacobian, seed = UInt32(1234)) + mode_array = mode[2:end] + (names, draws) = laplace_sample( + simple_jacobian_model, + mode_array; + jacobian = jacobian, + seed = UInt32(1234), + ) + + optimum = if jacobian + 3.3 + else + 3.0 + end + @test optimum ≈ mean(draws[:, names.=="sigma"]) atol = 0.2 + end + end + + @testset "Save Hessian" begin + data = "{\"N\": 3}" + mode = "{\"alpha\": [0.1,0.2,0.3]}" + + (_, _, hessian) = laplace_sample(gaussian_model, mode, data; save_hessian = true) + @test size(hessian) == (3, 3) + @test hessian ≈ [-1.0 0.0 0.0; 0.0 -1.0 0.0; 0.0 0.0 -1.0] atol = 0.1 + end + + @testset "Seed" begin + (_, draws1) = laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + BERNOULLI_DATA; + seed = UInt32(123), + ) + (_, draws2) = laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + BERNOULLI_DATA; + seed = UInt32(123), + ) + @test draws1 == draws2 + + (_, draws3) = laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + BERNOULLI_DATA; + seed = UInt32(456), + ) + @test draws1 != draws3 + end + + @testset "Bad data" begin + data1 = "{\"N\": -1}" + @test_throws "greater than or equal to 0" laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + data1, + ) + + data2 = "{\"N\":1, \"y\": [0,1]}" + @test_throws "mismatch in dimension" laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + data2, + ) + + @test_throws "Error in JSON parsing" laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + "{'bad'}", + ) + @test_throws "Could not open data file" laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + "path/not/here.json", + ) + end + + @testset "Bad mode array" begin + mode1 = [2.0] + @test_throws "Bounded variable is 2" laplace_sample( + bernoulli_model, + mode1, + BERNOULLI_DATA, + ) + + mode2 = [0.5, 0.5] + @test_throws "incorrect length" laplace_sample( + bernoulli_model, + mode2, + BERNOULLI_DATA, + ) + end + + @testset "Bad mode json" begin + mode1 = "{\"theta\": 2.0}" + @test_throws "Bounded variable is 2" laplace_sample( + bernoulli_model, + mode1, + BERNOULLI_DATA, + ) + + mode2 = "{\"theta\": [0.5, 0.5]}" + @test_throws "mismatch in number" laplace_sample( + bernoulli_model, + mode2, + BERNOULLI_DATA, + ) + + @test_throws "Could not open data file" laplace_sample( + bernoulli_model, + "bad/path.json", + BERNOULLI_DATA, + ) + end + + @testset "Bad number of draws" begin + @test_throws "at least 1" laplace_sample( + bernoulli_model, + BERNOULLI_MODE, + BERNOULLI_DATA; + num_draws = 0, + ) + end +end diff --git a/clients/python/tests/test_laplace.py b/clients/python/tests/test_laplace.py new file mode 100644 index 0000000..27cd818 --- /dev/null +++ b/clients/python/tests/test_laplace.py @@ -0,0 +1,147 @@ +import numpy as np +import pytest + +import tinystan +from tests import ( + BERNOULLI_DATA, + STAN_FOLDER, + bernoulli_model, + gaussian_model, + simple_jacobian_model, +) + +BERNOULLI_MODE = {"theta": 0.25} + + +def test_data(bernoulli_model): + bernoulli_mode_jac = bernoulli_model.optimize(BERNOULLI_DATA, jacobian=True) + # data is a string + out1 = bernoulli_model.laplace_sample(bernoulli_mode_jac, BERNOULLI_DATA) + assert 0.22 < out1["theta"].mean() < 0.28 + + # data stored in a file + data_file = STAN_FOLDER / "bernoulli" / "bernoulli.data.json" + out2 = bernoulli_model.laplace_sample(bernoulli_mode_jac, data_file) + assert 0.22 < out2["theta"].mean() < 0.28 + + +def test_num_draws(bernoulli_model): + out1 = bernoulli_model.laplace_sample(BERNOULLI_MODE, BERNOULLI_DATA, num_draws=223) + assert out1["theta"].shape == (223,) + + +def test_calculate_lp(bernoulli_model): + out1 = bernoulli_model.laplace_sample( + BERNOULLI_MODE, BERNOULLI_DATA, num_draws=500, calculate_lp=True + ) + assert np.sum(np.isnan(out1["log_p__"])) == 0 + + out2 = bernoulli_model.laplace_sample( + BERNOULLI_MODE, BERNOULLI_DATA, num_draws=500, calculate_lp=False + ) + assert np.sum(np.isnan(out2["log_p__"])) == 500 + + +@pytest.mark.parametrize("jacobian", [True, False], ids=["jacobian", "no_jacobian"]) +def test_jacobian(simple_jacobian_model, jacobian): + mode = simple_jacobian_model.optimize(jacobian=jacobian, seed=1234) + + out = simple_jacobian_model.laplace_sample(mode, jacobian=jacobian, seed=1234) + + if jacobian: + optimum = 3.3 + else: + optimum = 3 + assert np.isclose(out["sigma"].mean(), optimum, atol=0.2) + + +def test_save_hessian(gaussian_model): + data = {"N": 3} + mode = {"alpha": [0.1, 0.2, 0.3]} + + out = gaussian_model.laplace_sample(mode, data, save_hessian=True) + np.testing.assert_almost_equal( + out.hessian, np.array([[-1, 0, 0], [0, -1, 0], [0, 0, -1]]) + ) + + +def test_seed(bernoulli_model): + out1 = bernoulli_model.laplace_sample( + BERNOULLI_MODE, + BERNOULLI_DATA, + seed=123, + ) + out2 = bernoulli_model.laplace_sample( + BERNOULLI_MODE, + BERNOULLI_DATA, + seed=123, + ) + + np.testing.assert_equal(out1["theta"], out2["theta"]) + + out3 = bernoulli_model.laplace_sample( + BERNOULLI_MODE, + BERNOULLI_DATA, + seed=456, + ) + + with pytest.raises(AssertionError): + np.testing.assert_equal(out1["theta"], out3["theta"]) + + +def test_bad_data(bernoulli_model): + bernoulli_mode_jac = bernoulli_model.optimize(BERNOULLI_DATA, jacobian=True) + + data = {"N": -1} + with pytest.raises(RuntimeError, match="greater than or equal to 0"): + bernoulli_model.laplace_sample(bernoulli_mode_jac, data=(data)) + + data2 = {"N": 1, "y": [1, 2]} + with pytest.raises(RuntimeError, match="mismatch in dimension"): + bernoulli_model.laplace_sample(bernoulli_mode_jac, data=(data2)) + + with pytest.raises(RuntimeError, match="Error in JSON parsing"): + bernoulli_model.laplace_sample(bernoulli_mode_jac, data="{'bad'}") + + with pytest.raises(ValueError, match="Could not open data file"): + bernoulli_model.laplace_sample(bernoulli_mode_jac, data="path/not/here.json") + + +def test_bad_mode_stanoutput(bernoulli_model): + mode1 = bernoulli_model.sample(BERNOULLI_DATA) + with pytest.raises(ValueError, match="can only be used with"): + bernoulli_model.laplace_sample(mode1, BERNOULLI_DATA) + + +def test_bad_mode_array(bernoulli_model): + mode1 = np.array([2.0]) + with pytest.raises(RuntimeError, match="Bounded variable is 2"): + bernoulli_model.laplace_sample(mode1, BERNOULLI_DATA) + + mode2 = np.array([0.1, 0.1]) + with pytest.raises(ValueError, match="incorrect length"): + bernoulli_model.laplace_sample(mode2, BERNOULLI_DATA) + + +def test_bad_mode_json(bernoulli_model): + mode1 = {"theta": 2} # out of bounds + with pytest.raises(RuntimeError, match="Bounded variable is 2"): + bernoulli_model.laplace_sample(mode1, BERNOULLI_DATA) + + mode2 = {"theta": [0.1, 0.1]} + with pytest.raises(RuntimeError, match="mismatch in number"): + bernoulli_model.laplace_sample(mode2, BERNOULLI_DATA) + + with pytest.raises(ValueError, match="Could not open data file"): + bernoulli_model.laplace_sample("bad/path.json", BERNOULLI_DATA) + + +@pytest.mark.parametrize( + "arg, value, match", + [ + ("num_draws", 0, "at least 1"), + ], +) +def test_bad_argument(bernoulli_model, arg, value, match): + with pytest.raises(ValueError, match=match): + bernoulli_model.laplace_sample(BERNOULLI_DATA, **{arg: value}) diff --git a/clients/python/tests/test_pathfinder.py b/clients/python/tests/test_pathfinder.py index fe8ff84..479484d 100644 --- a/clients/python/tests/test_pathfinder.py +++ b/clients/python/tests/test_pathfinder.py @@ -91,7 +91,6 @@ def test_stan2368_bug(bernoulli_model): assert out6["theta"].shape == (1,) - def test_calculate_lp(bernoulli_model): out = bernoulli_model.pathfinder(BERNOULLI_DATA, num_paths=2, calculate_lp=False) assert np.sum(np.isnan(out["lp__"])) > 0 diff --git a/clients/python/tests/test_sample.py b/clients/python/tests/test_sample.py index 45152ca..8709203 100644 --- a/clients/python/tests/test_sample.py +++ b/clients/python/tests/test_sample.py @@ -196,6 +196,14 @@ def test_multiple_inits(multimodal_model, temp_json): assert np.all(out3["mu"][1] > 0) +def test_init_from_other(bernoulli_model): + out = bernoulli_model.optimize(BERNOULLI_DATA, jacobian=True) + out2 = bernoulli_model.sample( + BERNOULLI_DATA, num_warmup=100, num_samples=100, inits=out + ) + assert 0.2 < out2["theta"].mean() < 0.3 + + def test_bad_data(bernoulli_model): data = {"N": -1} with pytest.raises(RuntimeError, match="greater than or equal to 0"): diff --git a/clients/python/tinystan/model.py b/clients/python/tinystan/model.py index a8857e1..346102d 100644 --- a/clients/python/tinystan/model.py +++ b/clients/python/tinystan/model.py @@ -4,7 +4,7 @@ import warnings from enum import Enum from os import PathLike, fspath -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np from dllist import dllist @@ -66,6 +66,11 @@ def print_callback(msg, size, is_error): "lp__", ] +LAPLACE_VARIABLES = [ + "log_p__", + "log_q__", +] + FIXED_SAMPLER_VARIABLES = [ "lp__", "accept_stat__", @@ -102,6 +107,27 @@ def rand_u32(): return np.random.randint(0, 2**32 - 1, dtype=np.uint32) +def preprocess_laplace_inputs( + mode: Union[StanOutput, np.ndarray, Dict[str, Any], str, PathLike] +) -> Tuple[Optional[np.ndarray], Optional[str]]: + if isinstance(mode, StanOutput): + # handle case of passing optimization output directly + if len(mode.data.shape) == 1: + mode = mode.data[1:] + else: + raise ValueError("Laplace can only be used with Optimization output") + # mode = mode.create_inits(chains=1, seed=seed) + + if isinstance(mode, np.ndarray): + mode_json = None + mode_array = mode + else: + mode_json = encode_stan_json(mode) + mode_array = None + + return mode_array, mode_json + + class Model: def __init__( self, @@ -243,6 +269,24 @@ def __init__( err_ptr, ] + self._ffi_laplace = self._lib.tinystan_laplace_sample + self._ffi_laplace.restype = ctypes.c_int + self._ffi_laplace.argtypes = [ + ctypes.c_void_p, # model + nullable_double_array, # array of constrained params + ctypes.c_char_p, # json of constrained params + ctypes.c_uint, # seed + ctypes.c_int, # draws + ctypes.c_bool, # jacobian + ctypes.c_bool, # calculate_lp + ctypes.c_int, # refresh + ctypes.c_int, # num_threads + double_array, # draws buffer + ctypes.c_size_t, # buffer size + nullable_double_array, # hessian out + err_ptr, + ] + self._get_error_msg = self._lib.tinystan_get_error_message self._get_error_msg.restype = ctypes.c_char_p self._get_error_msg.argtypes = [ctypes.c_void_p] @@ -524,9 +568,6 @@ def optimize( ): seed = seed or rand_u32() - if isinstance(init, StanOutput): - init = init.create_inits(chains=1, seed=seed) - with self._get_model(data, seed) as model: param_names = OPTIMIZE_VARIABLES + self._get_parameter_names(model) @@ -536,7 +577,7 @@ def optimize( err = ctypes.pointer(ctypes.c_void_p()) rc = self._ffi_optimize( model, - encode_stan_json(init) if init is not None else None, + self._encode_inits(init, 1, seed), seed, id, init_radius, @@ -559,3 +600,67 @@ def optimize( self._raise_for_error(rc, err) return StanOutput(param_names, out) + + def laplace_sample( + self, + mode, + data="", + *, + num_draws=1000, + jacobian=True, + calculate_lp=True, + save_hessian=False, + seed=None, + refresh=0, + num_threads=-1, + ): + if num_draws < 1: + raise ValueError("num_draws must be at least 1") + + seed = seed or rand_u32() + + mode_array, mode_json = preprocess_laplace_inputs(mode) + + with self._get_model(data, seed) as model: + param_names = LAPLACE_VARIABLES + self._get_parameter_names(model) + num_params = len(param_names) + + if mode_array is not None and len(mode_array) != num_params - len( + LAPLACE_VARIABLES + ): + raise ValueError( + f"Mode array has incorrect length. Expected {num_params - len(LAPLACE_VARIABLES)}" + f" but got {len(mode_array)}" + ) + + out = np.zeros((num_draws, num_params), dtype=np.float64) + + model_params = self._num_free_params(model) + hessian_out = ( + np.zeros((model_params, model_params), dtype=np.float64) + if save_hessian + else None + ) + err = ctypes.pointer(ctypes.c_void_p()) + + rc = self._ffi_laplace( + model, + mode_array, + mode_json, + seed, + num_draws, + jacobian, + calculate_lp, + refresh, + num_threads, + out, + out.size, + hessian_out, + err, + ) + self._raise_for_error(rc, err) + + output = StanOutput(param_names, out) + if save_hessian: + output.hessian = hessian_out + return output diff --git a/src/R_shims.cpp b/src/R_shims.cpp index 987957f..2e31151 100644 --- a/src/R_shims.cpp +++ b/src/R_shims.cpp @@ -88,6 +88,33 @@ TINYSTAN_PUBLIC void tinystan_optimize_R( static_cast(*out_size), err); } +TINYSTAN_PUBLIC +void tinystan_laplace_sample_R(int* return_code, const TinyStanModel** model, + int* use_array, const double* theta_hat_constr, + const char** theta_hat_json, unsigned int* seed, + int* num_draws, int* jacobian, int* calculate_lp, + int* refresh, int* num_threads, double* out, + int* out_size, int* save_hessian, + double* hessian_out, TinyStanError** err) { + // difficult to directly pass a null pointer from R + double* hessian_out_ptr = nullptr; + if (*save_hessian) + hessian_out_ptr = hessian_out; + + const double* theta_hat_dbl_ptr = nullptr; + const char* theta_hat_json_ptr = nullptr; + if (*use_array) { + theta_hat_dbl_ptr = theta_hat_constr; + } else { + theta_hat_json_ptr = *theta_hat_json; + } + + *return_code = tinystan_laplace_sample( + *model, theta_hat_dbl_ptr, theta_hat_json_ptr, *seed, *num_draws, + (*jacobian != 0), (*calculate_lp != 0), *refresh, *num_threads, out, + static_cast(*out_size), hessian_out_ptr, err); +} + TINYSTAN_PUBLIC void tinystan_get_error_message_R(TinyStanError** err, char const** err_msg) { *err_msg = tinystan_get_error_message(*err); diff --git a/src/buffer.hpp b/src/buffer.hpp index 4c7542b..2481e9f 100644 --- a/src/buffer.hpp +++ b/src/buffer.hpp @@ -64,13 +64,13 @@ class buffer_writer : public stan::callbacks::writer { size_t size; }; -class metric_buffer_writer : public stan::callbacks::structured_writer { +class filtered_writer : public stan::callbacks::structured_writer { public: - metric_buffer_writer(double *buf) : buf(buf), pos(0){}; - virtual ~metric_buffer_writer(){}; + filtered_writer(std::string key, double *buf) : key(key), buf(buf), pos(0){}; + virtual ~filtered_writer(){}; - void write(const std::string &key, const Eigen::MatrixXd &mat) { - if (!pos && buf != nullptr && key == "inv_metric") { + void write(const std::string &key_in, const Eigen::MatrixXd &mat) { + if (!pos && buf != nullptr && key_in == key) { for (int j = 0; j < mat.cols(); ++j) { for (int i = 0; i < mat.rows(); ++i) { buf[pos++] = mat(i, j); @@ -78,8 +78,8 @@ class metric_buffer_writer : public stan::callbacks::structured_writer { } } } - void write(const std::string &key, const Eigen::VectorXd &vec) { - if (!pos && buf != nullptr && key == "inv_metric") { + void write(const std::string &key_in, const Eigen::VectorXd &vec) { + if (!pos && buf != nullptr && key_in == key) { for (int i = 0; i < vec.rows(); ++i) { buf[pos++] = vec(i); } @@ -89,6 +89,7 @@ class metric_buffer_writer : public stan::callbacks::structured_writer { using stan::callbacks::structured_writer::write; private: + std::string key; double *buf; size_t pos; }; diff --git a/src/model.hpp b/src/model.hpp index 2562e1f..080ed1a 100644 --- a/src/model.hpp +++ b/src/model.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -47,4 +48,43 @@ class TinyStanModel { size_t num_free_params; }; +namespace tinystan { +namespace model { + +Eigen::VectorXd unconstrain_parameters(const TinyStanModel *tmodel, + const double *theta, + const char *theta_json, + stan::callbacks::logger &logger) { + Eigen::VectorXd theta_unc(tmodel->num_free_params); + auto &model = *tmodel->model; + + std::stringstream msg; + try { + if (theta_json != nullptr) { + auto json_theta_hat = io::load_data(theta_json); + model.transform_inits(*json_theta_hat, theta_unc, &msg); + + } else if (theta != nullptr) { + Eigen::VectorXd theta_hat_constr_vec + = Eigen::Map(theta, tmodel->num_params); + model.unconstrain_array(theta_hat_constr_vec, theta_unc, &msg); + } else { + throw std::runtime_error("No initial value provided"); + } + } catch (...) { + if (msg.str().length() > 0) { + logger.info(msg.str()); + } + throw; + } + if (msg.str().length() > 0) { + logger.info(msg.str()); + } + + return theta_unc; +} + +} // namespace model +} // namespace tinystan + #endif diff --git a/src/tinystan.cpp b/src/tinystan.cpp index c1d638b..f50965b 100644 --- a/src/tinystan.cpp +++ b/src/tinystan.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,7 @@ int tinystan_sample(const TinyStanModel *tmodel, size_t num_chains, sample_writers.emplace_back(out + draws_offset * i, draws_offset); } - std::vector metric_writers; + std::vector metric_writers; metric_writers.reserve(num_chains); int num_model_params = tmodel->num_free_params; int metric_offset = metric_choice == dense @@ -110,9 +111,10 @@ int tinystan_sample(const TinyStanModel *tmodel, size_t num_chains, : num_model_params; for (size_t i = 0; i < num_chains; ++i) { if (metric_out != nullptr) - metric_writers.emplace_back(metric_out + metric_offset * i); + metric_writers.emplace_back("inv_metric", + metric_out + metric_offset * i); else - metric_writers.emplace_back(nullptr); + metric_writers.emplace_back("inv_metric", nullptr); } auto initial_metrics = io::make_metric_inits( @@ -366,6 +368,48 @@ int tinystan_optimize(const TinyStanModel *tmodel, const char *init, return -1; } +int tinystan_laplace_sample(const TinyStanModel *tmodel, + const double *theta_hat_constr, + const char *theta_hat_json, unsigned int seed, + int num_draws, bool jacobian, bool calculate_lp, + int refresh, int num_threads, double *out, + size_t out_size, double *hessian_out, + TinyStanError **err) { + TINYSTAN_TRY_CATCH({ + error::check_positive("num_draws", num_draws); + + util::init_threading(num_threads); + + auto &model = *tmodel->model; + io::buffer_writer sample_writer(out, out_size); + io::filtered_writer hessian_writer("Hessian", hessian_out); + error::error_logger logger(refresh != 0); + interrupt::tinystan_interrupt_handler interrupt; + + Eigen::VectorXd theta_hat = model::unconstrain_parameters( + tmodel, theta_hat_constr, theta_hat_json, logger); + + int return_code; + if (jacobian) { + return_code = stan::services::laplace_sample( + model, theta_hat, num_draws, calculate_lp, seed, refresh, interrupt, + logger, sample_writer, hessian_writer); + } else { + return_code = stan::services::laplace_sample( + model, theta_hat, num_draws, calculate_lp, seed, refresh, interrupt, + logger, sample_writer, hessian_writer); + } + + if (return_code != 0) { + if (err != nullptr) { + *err = logger.get_error(); + } + } + return return_code; + }) + return -1; +} + const char *tinystan_get_error_message(const TinyStanError *err) { if (err == nullptr) { return "Something went wrong: No error found"; diff --git a/src/tinystan.h b/src/tinystan.h index 08ea27f..75832c5 100644 --- a/src/tinystan.h +++ b/src/tinystan.h @@ -132,6 +132,15 @@ TINYSTAN_PUBLIC int tinystan_optimize( int refresh, int num_threads, double *out, size_t out_size, TinyStanError **err); +TINYSTAN_PUBLIC +int tinystan_laplace_sample(const TinyStanModel *tmodel, + const double *theta_hat_constr, + const char *theta_hat_json, unsigned int seed, + int num_draws, bool jacobian, bool calculate_lp, + int refresh, int num_threads, double *out, + size_t out_size, double *hessian_out, + TinyStanError **err); + /** * Get the error message from an error object. *