From 340cb041f077b9ebb208876596d56085087b0895 Mon Sep 17 00:00:00 2001 From: Brian Ward Date: Thu, 29 Aug 2024 14:20:16 -0400 Subject: [PATCH] Add input size checks (#241) * Add input checks in Python * Add input checks in Julia * Add input checks in R --- R/R/bridgestan.R | 31 ++++++++++++++++--- R/tests/testthat/test_model.R | 2 ++ julia/src/model.jl | 57 ++++++++++++++++++++++++----------- julia/test/model_tests.jl | 14 +++++++++ python/bridgestan/model.py | 18 +++++++---- python/test/test_stanmodel.py | 57 +++++++++++++++++++++++------------ 6 files changed, 131 insertions(+), 48 deletions(-) diff --git a/R/R/bridgestan.R b/R/R/bridgestan.R index 57fb1079..ceb0bc93 100755 --- a/R/R/bridgestan.R +++ b/R/R/bridgestan.R @@ -59,6 +59,9 @@ StanModel <- R6::R6Class("StanModel", } private$model <- ret$ptr_out + # pre-compute to avoid repeated work in bounds checks + private$unc_dims <- self$param_unc_num() + model_version <- self$model_version() if (packageVersion("bridgestan") != paste(model_version$major, model_version$minor, model_version$patch, sep = ".")) { warning(paste0("The version of the compiled model does not match the version of the R library. ", @@ -167,6 +170,9 @@ StanModel <- R6::R6Class("StanModel", } else { rng_ptr <- as.raw(rng$ptr) } + if (length(theta_unc) != private$unc_dims) { + stop("Incorrect number of unconstrained parameters.") + } vars <- .C("bs_param_constrain_R", as.raw(private$model), as.logical(include_tp), as.logical(include_gq), as.double(theta_unc), theta = double(self$param_num(include_tp = include_tp, include_gq = include_gq)), @@ -202,7 +208,7 @@ StanModel <- R6::R6Class("StanModel", param_unconstrain = function(theta) { vars <- .C("bs_param_unconstrain_R", as.raw(private$model), as.double(theta), - theta_unc = double(self$param_unc_num()), + theta_unc = double(private$unc_dims), return_code = as.integer(0), err_msg = as.character(""), err_ptr = raw(8), @@ -223,7 +229,7 @@ StanModel <- R6::R6Class("StanModel", param_unconstrain_json = function(json) { vars <- .C("bs_param_unconstrain_json_R", as.raw(private$model), as.character(json), - theta_unc = double(self$param_unc_num()), + theta_unc = double(private$unc_dims), return_code = as.integer(0), err_msg = as.character(""), err_ptr = raw(8), @@ -241,6 +247,9 @@ StanModel <- R6::R6Class("StanModel", #' @param jacobian If `TRUE`, include change of variables terms for constrained parameters. #' @return The log density. log_density = function(theta_unc, propto = TRUE, jacobian = TRUE) { + if (length(theta_unc) != private$unc_dims) { + stop("Incorrect number of unconstrained parameters.") + } vars <- .C("bs_log_density_R", as.raw(private$model), as.logical(propto), as.logical(jacobian), as.double(theta_unc), val = double(1), @@ -262,7 +271,10 @@ StanModel <- R6::R6Class("StanModel", #' @param jacobian If `TRUE`, include change of variables terms for constrained parameters. #' @return List containing entries `val` (the log density) and `gradient` (the gradient). log_density_gradient = function(theta_unc, propto = TRUE, jacobian = TRUE) { - dims <- self$param_unc_num() + if (length(theta_unc) != private$unc_dims) { + stop("Incorrect number of unconstrained parameters.") + } + dims <- private$unc_dims vars <- .C("bs_log_density_gradient_R", as.raw(private$model), as.logical(propto), as.logical(jacobian), as.double(theta_unc), val = double(1), gradient = double(dims), @@ -284,7 +296,10 @@ StanModel <- R6::R6Class("StanModel", #' @param jacobian If `TRUE`, include change of variables terms for constrained parameters. #' @return List containing entries `val` (the log density), `gradient` (the gradient), and `hessian` (the Hessian). log_density_hessian = function(theta_unc, propto = TRUE, jacobian = TRUE) { - dims <- self$param_unc_num() + if (length(theta_unc) != private$unc_dims) { + stop("Incorrect number of unconstrained parameters.") + } + dims <- private$unc_dims vars <- .C("bs_log_density_hessian_R", as.raw(private$model), as.logical(propto), as.logical(jacobian), as.double(theta_unc), val = double(1), gradient = double(dims), hess = double(dims * dims), @@ -308,7 +323,12 @@ StanModel <- R6::R6Class("StanModel", #' @param jacobian If `TRUE`, include change of variables terms for constrained parameters. #' @return List containing entries `val` (the log density) and `Hvp` (the hessian-vector product). log_density_hessian_vector_product = function(theta_unc, v, propto = TRUE, jacobian = TRUE){ - dims <- self$param_unc_num() + dims <- private$unc_dims + if (length(theta_unc) != dims) { + stop("Incorrect number of unconstrained parameters.") + } else if (length(v) != dims) { + stop("Incorrect number of vector elements.") + } vars <- .C("bs_log_density_hessian_vector_product_R", as.raw(private$model), as.logical(propto), as.logical(jacobian), as.double(theta_unc), @@ -331,6 +351,7 @@ StanModel <- R6::R6Class("StanModel", lib_name = NA, model = NA, seed = NA, + unc_dims = NA, finalize = function() { .C("bs_model_destruct_R", as.raw(private$model), diff --git a/R/tests/testthat/test_model.R b/R/tests/testthat/test_model.R index a987e65c..7b29f9ef 100644 --- a/R/tests/testthat/test_model.R +++ b/R/tests/testthat/test_model.R @@ -114,6 +114,8 @@ test_that("param_constrain handles rng arguments", { # require at least one present expect_error(full$param_constrain(c(1.2), include_gq = TRUE), "rng must be provided") + + expect_error(full$param_constrain(c(1.2, 1.2)), "Incorrect number of unconstrained parameters") }) diff --git a/julia/src/model.jl b/julia/src/model.jl index 49dbe786..0fbee5cb 100644 --- a/julia/src/model.jl +++ b/julia/src/model.jl @@ -36,6 +36,7 @@ mutable struct StanModel stanmodel::Ptr{StanModelStruct} @const data::String @const seed::UInt32 + @const param_unc_num::Int function StanModel( lib::String, @@ -85,7 +86,11 @@ mutable struct StanModel error(handle_error(lib, err, "bs_model_construct")) end - sm = new(lib, stanmodel, data, seed) + # compute now to avoid re-computing in bounds checks later + param_unc_num = + @ccall $(dlsym(lib, :bs_param_unc_num))(stanmodel::Ptr{StanModelStruct})::Cint + + sm = new(lib, stanmodel, data, seed, param_unc_num) function f(sm) @ccall $(dlsym(sm.lib, :bs_model_destruct))( @@ -279,7 +284,13 @@ function param_constrain!( rng::Union{StanRNG,Nothing} = nothing, ) dims = param_num(sm; include_tp = include_tp, include_gq = include_gq) - if length(out) != dims + if length(theta_unc) != sm.param_unc_num + throw( + DimensionMismatch( + "theta_unc must be same size as number of unconstrained parameters", + ), + ) + elseif length(out) != dims throw( DimensionMismatch("out must be same size as number of constrained parameters"), ) @@ -359,8 +370,7 @@ The result is stored in the vector `out`, and a reference is returned. See This is the inverse of [`param_constrain!`](@ref). """ function param_unconstrain!(sm::StanModel, theta::Vector{Float64}, out::Vector{Float64}) - dims = param_unc_num(sm) - if length(out) != dims + if length(out) != sm.param_unc_num throw( DimensionMismatch( "out must be same size as number of unconstrained parameters", @@ -396,7 +406,7 @@ re-using existing memory. This is the inverse of [`param_constrain`](@ref). """ function param_unconstrain(sm::StanModel, theta::Vector{Float64}) - out = zeros(param_unc_num(sm)) + out = zeros(sm.param_unc_num) param_unconstrain!(sm, theta, out) end @@ -411,8 +421,7 @@ The result is stored in the vector `out`, and a reference is returned. See [`param_unconstrain_json`](@ref) for a version which allocates fresh memory. """ function param_unconstrain_json!(sm::StanModel, theta::String, out::Vector{Float64}) - dims = param_unc_num(sm) - if length(out) != dims + if length(out) != sm.param_unc_num throw( DimensionMismatch( "out must be same size as number of unconstrained parameters", @@ -445,7 +454,7 @@ See [`param_unconstrain_json!`](@ref) for a version which allows re-using existing memory. """ function param_unconstrain_json(sm::StanModel, theta::String) - out = zeros(param_unc_num(sm)) + out = zeros(sm.param_unc_num) param_unconstrain_json!(sm, theta, out) end @@ -498,8 +507,11 @@ function log_density_gradient!( propto::Bool = true, jacobian::Bool = true, ) - dims = param_unc_num(sm) - if length(out) != dims + if length(q) != sm.param_unc_num + throw( + DimensionMismatch("q must be same size as number of unconstrained parameters"), + ) + elseif length(out) != sm.param_unc_num throw( DimensionMismatch( "out must be same size as number of unconstrained parameters", @@ -541,7 +553,7 @@ function log_density_gradient( propto::Bool = true, jacobian::Bool = true, ) - grad = zeros(param_unc_num(sm)) + grad = zeros(sm.param_unc_num) log_density_gradient!(sm, q, grad; propto = propto, jacobian = jacobian) end @@ -565,8 +577,12 @@ function log_density_hessian!( propto::Bool = true, jacobian::Bool = true, ) - dims = param_unc_num(sm) - if length(out_grad) != dims + dims = sm.param_unc_num + if length(q) != dims + throw( + DimensionMismatch("q must be same size as number of unconstrained parameters"), + ) + elseif length(out_grad) != dims throw( DimensionMismatch( "out_grad must be same size as number of unconstrained parameters", @@ -615,7 +631,7 @@ function log_density_hessian( propto::Bool = true, jacobian::Bool = true, ) - dims = param_unc_num(sm) + dims = sm.param_unc_num grad = zeros(dims) hess = zeros(dims * dims) log_density_hessian!(sm, q, grad, hess; propto = propto, jacobian = jacobian) @@ -641,8 +657,15 @@ function log_density_hessian_vector_product!( propto::Bool = true, jacobian::Bool = true, ) - dims = param_unc_num(sm) - if length(out) != dims + if length(q) != sm.param_unc_num + throw( + DimensionMismatch("q must be same size as number of unconstrained parameters"), + ) + elseif length(v) != sm.param_unc_num + throw( + DimensionMismatch("v must be same size as number of unconstrained parameters"), + ) + elseif length(out) != sm.param_unc_num throw( DimensionMismatch( "out must be same size as number of unconstrained parameters", @@ -687,7 +710,7 @@ function log_density_hessian_vector_product( propto::Bool = true, jacobian::Bool = true, ) - out = zeros(param_unc_num(sm)) + out = zeros(sm.param_unc_num) log_density_hessian_vector_product!(sm, q, v, out; propto = propto, jacobian = jacobian) end diff --git a/julia/test/model_tests.jl b/julia/test/model_tests.jl index e923abc9..d778ab67 100644 --- a/julia/test/model_tests.jl +++ b/julia/test/model_tests.jl @@ -177,6 +177,7 @@ end model2 = load_test_model("full", false) + a = randn(BridgeStan.param_unc_num(model2)) rng = StanRNG(model2, 1234) @test 1 == length(BridgeStan.param_constrain(model2, a)) @test 2 == length(BridgeStan.param_constrain(model2, a; include_tp = true)) @@ -392,6 +393,12 @@ end jacobian = true, ) + y_unc_bad = zeros(length(y_unc) + 1) + @test_throws DimensionMismatch BridgeStan.log_density_gradient(model, y_unc_bad) + + y_unc_bad = zeros(length(y_unc) - 1) + @test_throws DimensionMismatch BridgeStan.log_density_gradient(model, y_unc_bad) + end @testset "log_density_hessian" begin @@ -473,6 +480,13 @@ end jacobian = true, ) + + y_unc_bad = zeros(length(y_unc) + 1) + @test_throws DimensionMismatch BridgeStan.log_density_hessian(model, y_unc_bad) + + y_unc_bad = zeros(length(y_unc) - 1) + @test_throws DimensionMismatch BridgeStan.log_density_hessian(model, y_unc_bad) + end end diff --git a/python/bridgestan/model.py b/python/bridgestan/model.py index fa29560e..d7c4e490 100644 --- a/python/bridgestan/model.py +++ b/python/bridgestan/model.py @@ -198,6 +198,12 @@ def __init__( num_params = self._param_unc_num(self.model) + param_sized_array = array_ptr( + dtype=ctypes.c_double, + flags=("C_CONTIGUOUS",), + shape=(num_params,), + ) + param_sized_out_array = array_ptr( dtype=ctypes.c_double, flags=("C_CONTIGUOUS", "WRITEABLE"), @@ -227,7 +233,7 @@ def __init__( ctypes.c_void_p, ctypes.c_bool, ctypes.c_bool, - double_array, + param_sized_array, writeable_double_array, ctypes.c_void_p, star_star_char, @@ -257,7 +263,7 @@ def __init__( ctypes.c_void_p, ctypes.c_bool, ctypes.c_bool, - double_array, + param_sized_array, ctypes.POINTER(ctypes.c_double), star_star_char, ] @@ -268,7 +274,7 @@ def __init__( ctypes.c_void_p, ctypes.c_bool, ctypes.c_bool, - double_array, + param_sized_array, ctypes.POINTER(ctypes.c_double), param_sized_out_array, star_star_char, @@ -280,7 +286,7 @@ def __init__( ctypes.c_void_p, ctypes.c_bool, ctypes.c_bool, - double_array, + param_sized_array, ctypes.POINTER(ctypes.c_double), param_sized_out_array, param_sqrd_sized_out_array, @@ -293,8 +299,8 @@ def __init__( ctypes.c_void_p, ctypes.c_bool, ctypes.c_bool, - double_array, - double_array, + param_sized_array, + param_sized_array, ctypes.POINTER(ctypes.c_double), param_sized_out_array, star_star_char, diff --git a/python/test/test_stanmodel.py b/python/test/test_stanmodel.py index 27d59a7c..48e1de91 100644 --- a/python/test/test_stanmodel.py +++ b/python/test/test_stanmodel.py @@ -193,8 +193,8 @@ def test_param_constrain(): bridge = bs.StanModel(fr_gaussian_so, fr_gaussian_data) D = 4 - unc_size = 10 - a = np.random.normal(size=unc_size) + unc_size = bridge.param_unc_num() + a = np.random.normal(size=(unc_size,)) B_expected = cov_constrain(a, D) b = bridge.param_constrain(a, include_tp=False, include_gq=False) @@ -213,9 +213,19 @@ def test_param_constrain(): B = b.reshape(D, D) np.testing.assert_allclose(B_expected, B) + # out tests, matched and mismatched + scratch = np.zeros(16) + b = bridge.param_constrain(a, out=scratch) + B = b.reshape(D, D) + np.testing.assert_allclose(B_expected, B) + scratch_wrong = np.zeros(10) + with pytest.raises(ValueError): + bridge.param_constrain(a, out=scratch_wrong) + full_so = STAN_FOLDER / "full" / "full_model.so" bridge2 = bs.StanModel(full_so) rng = bridge2.new_rng(seed=1234) + a = np.random.normal(size=bridge2.param_unc_num()) np.testing.assert_equal(1, bridge2.param_constrain(a).size) np.testing.assert_equal(2, bridge2.param_constrain(a, include_tp=True).size) @@ -236,20 +246,11 @@ def test_param_constrain(): with pytest.raises(ValueError): bridge2.param_constrain(a, include_gq=True) - # out tests, matched and mismatched - scratch = np.zeros(16) - b = bridge.param_constrain(a, out=scratch) - B = b.reshape(D, D) - np.testing.assert_allclose(B_expected, B) - scratch_wrong = np.zeros(10) - with pytest.raises(ValueError): - bridge.param_constrain(a, out=scratch_wrong) - # exception handling test in transformed parameters/model (compiled same way) throw_tp_so = STAN_FOLDER / "throw_tp" / "throw_tp_model.so" bridge2 = bs.StanModel(throw_tp_so) - y = np.array(np.random.uniform(1)) + y = np.random.uniform(size=(1,)) bridge2.param_constrain(y, include_tp=False) with pytest.raises(RuntimeError, match="find this text: tpfails"): bridge2.param_constrain(y, include_tp=True) @@ -318,20 +319,20 @@ def test_log_density(): bridge = bs.StanModel(bernoulli_so, bernoulli_data) y = np.asarray([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) for _ in range(2): - x = np.random.uniform(size=bridge.param_unc_num()) + x = np.random.uniform(size=(bridge.param_unc_num(),)) x_unc = np.log(x / (1 - x)) - lp = bridge.log_density(np.array([x_unc]), propto=False, jacobian=False) + lp = bridge.log_density(x_unc, propto=False, jacobian=False) np.testing.assert_allclose(lp, _bernoulli(y, x)) - lp2 = bridge.log_density(np.array([x_unc]), propto=False, jacobian=True) + lp2 = bridge.log_density(x_unc, propto=False, jacobian=True) np.testing.assert_allclose(lp2, _bernoulli_jacobian(y, x)) - lp3 = bridge.log_density(np.array([x_unc]), propto=True, jacobian=True) + lp3 = bridge.log_density(x_unc, propto=True, jacobian=True) np.testing.assert_allclose(lp3, _bernoulli_jacobian(y, x)) - lp4 = bridge.log_density(np.array([x_unc]), propto=True, jacobian=False) + lp4 = bridge.log_density(x_unc, propto=True, jacobian=False) np.testing.assert_allclose(lp4, _bernoulli(y, x)) throw_lp_so = STAN_FOLDER / "throw_lp" / "throw_lp_model.so" bridge2 = bs.StanModel(throw_lp_so) - y2 = np.array(np.random.uniform(1)) + y2 = np.array([np.random.uniform(1)]) with pytest.raises(RuntimeError, match="find this text: lpfails"): bridge2.log_density(y2) @@ -363,7 +364,7 @@ def _grad_jacobian_true(y_unc): y = np.abs(np.random.normal(1)) y_unc = np.log(y) - y_unc_arr = np.array(y_unc) + y_unc_arr = np.array([y_unc]) logdensity, grad = bridge.log_density_gradient( y_unc_arr, propto=True, jacobian=True ) @@ -405,6 +406,14 @@ def _grad_jacobian_true(y_unc): with pytest.raises(ctypes.ArgumentError): bridge.log_density_gradient(y_unc, out=scratch_bad) + y_unc_wrong = np.zeros(y_unc.size + 1) + with pytest.raises(ctypes.ArgumentError): + bridge.log_density_gradient(y_unc_wrong) + + y_unc_wrong = np.zeros(y_unc.size - 1) + with pytest.raises(ctypes.ArgumentError): + bridge.log_density_gradient(y_unc_wrong) + def test_log_density_hessian(): def _logp(y_unc): @@ -443,7 +452,7 @@ def _hess_jacobian_true(y_unc): # test value, gradient, hessian, all combos +/- propto, +/- jacobian y = np.abs(np.random.normal(1)) y_unc = np.log(y) - y_unc_arr = np.array(y_unc) + y_unc_arr = np.array([y_unc]) logdensity, grad, hess = bridge.log_density_hessian( y_unc_arr, propto=True, jacobian=True ) @@ -511,6 +520,14 @@ def _hess_jacobian_true(y_unc): with pytest.raises(ctypes.ArgumentError): bridge.log_density_hessian(y_unc, out_grad=scratch_bad) + y_unc_wrong = np.zeros(y_unc.size + 1) + with pytest.raises(ctypes.ArgumentError): + bridge.log_density_hessian(y_unc_wrong) + + y_unc_wrong = np.zeros(y_unc.size - 1) + with pytest.raises(ctypes.ArgumentError): + bridge.log_density_hessian(y_unc_wrong) + # test with 5 x 5 Hessian simple_so = STAN_FOLDER / "simple" / "simple_model.so" simple_data = STAN_FOLDER / "simple" / "simple.data.json"