lanl-ansi · odow · Aug 29, 2024 · Aug 28, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -46,6 +46,11 @@ Affine
 BinaryDecisionTree
 ```
 
+## `GrayBox`
+```@docs
+GrayBox
+```
+
 ## `Pipeline`
 ```@docs
 Pipeline

diff --git a/docs/src/manual/predictors.md b/docs/src/manual/predictors.md
@@ -14,6 +14,7 @@ The following predictors are supported. See their docstrings for details:
 | :----------------- | :------------------------------------- | :--------- |
 | [`Affine`](@ref)   |  $f(x) = Ax + b$                       | $M \rightarrow N$   |
 | [`BinaryDecisionTree`](@ref) | A binary decision tree       | $M \rightarrow 1$   |
+| [`GrayBox`](@ref)  |  $f(x)$                                | $M \rightarrow N$   |
 | [`Pipeline`](@ref) |  $f(x) = (l_1 \circ \ldots \circ l_N)(x)$ | $M \rightarrow N$ |
 | [`Quantile`](@ref) |  The quantiles of a distribution       | $M \rightarrow N$   |
 | [`ReLU`](@ref)     |  $f(x) = \max.(0, x)$                  | $M \rightarrow M$   |

diff --git a/ext/MathOptAIFluxExt.jl b/ext/MathOptAIFluxExt.jl
@@ -17,6 +17,7 @@ import MathOptAI
         x::Vector;
         config::Dict = Dict{Any,Any}(),
         reduced_space::Bool = false,
+        gray_box::Bool = false,
     )
 
 Add a trained neural network from Flux.jl to `model`.
@@ -40,6 +41,8 @@ Add a trained neural network from Flux.jl to `model`.
    [`AbstractPredictor`](@ref)s that control how the activation functions are
    reformulated. For example, `Flux.sigmoid => MathOptAI.Sigmoid()` or
    `Flux.relu => MathOptAI.QuadraticReLU()`.
+ * `gray_box`: if `true`, the neural network is added as a user-defined
+   nonlinear operator, with gradients provided by `Flux.withjacobian`.
 
 ## Example
 
@@ -68,8 +71,9 @@ function MathOptAI.add_predictor(
     x::Vector;
     config::Dict = Dict{Any,Any}(),
     reduced_space::Bool = false,
+    gray_box::Bool = false,
 )
-    inner_predictor = MathOptAI.build_predictor(predictor; config)
+    inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box)
     if reduced_space
         inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
     end
@@ -80,6 +84,7 @@ end
     MathOptAI.build_predictor(
         predictor::Flux.Chain;
         config::Dict = Dict{Any,Any}(),
+        gray_box::Bool = false,
     )
 
 Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
@@ -103,6 +108,8 @@ Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
    [`AbstractPredictor`](@ref)s that control how the activation functions are
    reformulated. For example, `Flux.sigmoid => MathOptAI.Sigmoid()` or
    `Flux.relu => MathOptAI.QuadraticReLU()`.
+ * `gray_box`: if `true`, the neural network is added as a user-defined
+   nonlinear operator, with gradients provided by `Flux.withjacobian`.
 
 ## Example
 
@@ -133,14 +140,32 @@ Pipeline with layers:
 function MathOptAI.build_predictor(
     predictor::Flux.Chain;
     config::Dict = Dict{Any,Any}(),
+    gray_box::Bool = false,
 )
+    if gray_box
+        if !isempty(config)
+            error("cannot specify the `config` kwarg if `gray_box = true`")
+        end
+        return MathOptAI.GrayBox(predictor)
+    end
     inner_predictor = MathOptAI.Pipeline(MathOptAI.AbstractPredictor[])
     for layer in predictor.layers
         _add_predictor(inner_predictor, layer, config)
     end
     return inner_predictor
 end
 
+function MathOptAI.GrayBox(predictor::Flux.Chain)
+    function output_size(x)
+        return only(Flux.outputsize(predictor, (length(x),)))
+    end
+    function with_jacobian(x)
+        ret = Flux.withjacobian(x -> predictor(Float32.(x)), collect(x))
+        return (value = ret.val, jacobian = only(ret.grad))
+    end
+    return MathOptAI.GrayBox(output_size, with_jacobian)
+end
+
 function _add_predictor(::MathOptAI.Pipeline, layer::Any, ::Dict)
     return error("Unsupported layer: $layer")
 end

diff --git a/ext/MathOptAIPythonCallExt.jl b/ext/MathOptAIPythonCallExt.jl
@@ -17,6 +17,7 @@ import MathOptAI
         x::Vector;
         config::Dict = Dict{Any,Any}(),
         reduced_space::Bool = false,
+        gray_box::Bool = false,
     )
 
 Add a trained neural network from Pytorch via PythonCall.jl to `model`.
@@ -35,15 +36,18 @@ Add a trained neural network from Pytorch via PythonCall.jl to `model`.
    that control how the activation functions are reformulated. For example,
    `:Sigmoid => MathOptAI.Sigmoid()` or `:ReLU => MathOptAI.QuadraticReLU()`.
    The supported Symbols are `:ReLU`, `:Sigmoid`, and `:Tanh`.
+ * `gray_box`: if `true`, the neural network is added as a user-defined
+   nonlinear operator, with gradients provided by `torch.func.jacrev`.
 """
 function MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::MathOptAI.PytorchModel,
     x::Vector;
     config::Dict = Dict{Any,Any}(),
     reduced_space::Bool = false,
+    gray_box::Bool = false,
 )
-    inner_predictor = MathOptAI.build_predictor(predictor; config)
+    inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box)
     if reduced_space
         inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
     end
@@ -54,6 +58,7 @@ end
     MathOptAI.build_predictor(
         predictor::MathOptAI.PytorchModel;
         config::Dict = Dict{Any,Any}(),
+        gray_box::Bool = false,
     )
 
 Convert a trained neural network from Pytorch via PythonCall.jl to a
@@ -73,11 +78,20 @@ Convert a trained neural network from Pytorch via PythonCall.jl to a
    that control how the activation functions are reformulated. For example,
    `:Sigmoid => MathOptAI.Sigmoid()` or `:ReLU => MathOptAI.QuadraticReLU()`.
    The supported Symbols are `:ReLU`, `:Sigmoid`, and `:Tanh`.
+ * `gray_box`: if `true`, the neural network is added as a user-defined
+   nonlinear operator, with gradients provided by `torch.func.jacrev`.
 """
 function MathOptAI.build_predictor(
     predictor::MathOptAI.PytorchModel;
     config::Dict = Dict{Any,Any}(),
+    gray_box::Bool = false,
 )
+    if gray_box
+        if !isempty(config)
+            error("cannot specify the `config` kwarg if `gray_box = true`")
+        end
+        return MathOptAI.GrayBox(predictor)
+    end
     torch = PythonCall.pyimport("torch")
     nn = PythonCall.pyimport("torch.nn")
     torch_model = torch.load(predictor.filename)
@@ -104,4 +118,23 @@ function _predictor(nn, layer, config)
     return error("unsupported layer: $layer")
 end
 
+function MathOptAI.GrayBox(predictor::MathOptAI.PytorchModel)
+    torch = PythonCall.pyimport("torch")
+    torch_model = torch.load(predictor.filename)
+    J = torch.func.jacrev(torch_model)
+    # TODO(odow): I'm not sure if there is a better way to get the output
+    # dimension of a torch model object?
+    output_size(::Any) = PythonCall.pyconvert(Int, torch_model[-1].out_features)
+    function with_jacobian(x)
+        py_x = torch.tensor(collect(x))
+        py_value = torch_model(py_x).detach().numpy()
+        py_jacobian = J(py_x).detach().numpy()
+        return (;
+            value = PythonCall.pyconvert(Vector, py_value),
+            jacobian = PythonCall.pyconvert(Matrix, py_jacobian),
+        )
+    end
+    return MathOptAI.GrayBox(output_size, with_jacobian)
+end
+
 end  # module
diff --git a/src/predictors/GrayBox.jl b/src/predictors/GrayBox.jl
@@ -0,0 +1,100 @@
+# Copyright (c) 2024: Oscar Dowson and contributors
+# Copyright (c) 2024: Triad National Security, LLC
+#
+# Use of this source code is governed by a BSD-style license that can be found
+# in the LICENSE.md file.
+
+"""
+    GrayBox(
+        output_size::Function,
+        with_jacobian::Function,
+    ) <: AbstractPredictor
+
+An [`AbstractPredictor`](@ref) that represents the function ``f(x)`` as a
+user-defined nonlinear operator.
+
+## arguments
+
+ * `output_size(x::Vector):Int`: given an input vector `x`, return the dimension
+   of the output vector
+ * `with_jacobian(x::Vector)::NamedTuple -> (;value, jacobian)`: given an input
+   vector `x`, return a `NamedTuple` that computes the primal value and Jacobian
+   of the output value with respect to the input. `jacobian[j, i]` is the
+   partial derivative of `value[j]` with respect to `x[i]`.
+
+## Example
+
+```jldoctest; filter=r"##[0-9]+"
+julia> using JuMP, MathOptAI
+
+julia> model = Model();
+
+julia> @variable(model, x[1:2]);
+
+julia> f = MathOptAI.GrayBox(
+           x -> 2,
+           x -> (value = x.^2, jacobian = [2 * x[1] 0.0; 0.0 2 * x[2]]),
+       );
+
+julia> y = MathOptAI.add_predictor(model, f, x)
+2-element Vector{VariableRef}:
+ moai_GrayBox[1]
+ moai_GrayBox[2]
+
+julia> print(model)
+Feasibility
+Subject to
+ op_##238(x[1], x[2]) - moai_GrayBox[1] = 0
+ op_##239(x[1], x[2]) - moai_GrayBox[2] = 0
+
+julia> y = MathOptAI.add_predictor(model, MathOptAI.ReducedSpace(f), x)
+2-element Vector{NonlinearExpr}:
+ op_##240(x[1], x[2])
+ op_##241(x[1], x[2])
+```
+"""
+struct GrayBox{F<:Function,G<:Function} <: AbstractPredictor
+    output_size::F
+    with_jacobian::G
+end
+
+function add_predictor(model::JuMP.AbstractModel, predictor::GrayBox, x::Vector)
+    op = add_predictor(model, ReducedSpace(predictor), x)
+    y = JuMP.@variable(model, [1:length(op)], base_name = "moai_GrayBox")
+    JuMP.@constraint(model, op .== y)
+    return y
+end
+
+function add_predictor(
+    model::JuMP.AbstractModel,
+    predictor::ReducedSpace{<:GrayBox},
+    x::Vector,
+)
+    last_x, cache = nothing, nothing
+    function update(x)
+        if x != last_x
+            cache = predictor.predictor.with_jacobian(x)
+            last_x = x
+        end
+        return
+    end
+    function f(i::Int, x...)::Float64
+        update(x)
+        return cache.value[i]
+    end
+    function ∇f(g::AbstractVector{Float64}, i::Int, x...)
+        update(x)
+        g .= cache.jacobian[i, :]
+        return
+    end
+    return map(1:predictor.predictor.output_size(x)) do i
+        op_i = JuMP.add_nonlinear_operator(
+            model,
+            length(x),
+            (x...) -> f(i, x...),
+            (g, x...) -> ∇f(g, i, x...);
+            name = Symbol("op_$(gensym())"),
+        )
+        return op_i(x...)
+    end
+end
diff --git a/test/test_Flux.jl b/test/test_Flux.jl
@@ -201,6 +201,56 @@ function test_unsupported_layer()
     return
 end
 
+function test_gray_box_scalar_output()
+    chain = Flux.Chain(Flux.Dense(2 => 16, Flux.relu), Flux.Dense(16 => 1))
+    model = Model(Ipopt.Optimizer)
+    set_silent(model)
+    set_attribute(model, "max_iter", 5)
+    @variable(model, 0 <= x[1:2] <= 1)
+    y = MathOptAI.add_predictor(
+        model,
+        chain,
+        x;
+        gray_box = true,
+        reduced_space = true,
+    )
+    @objective(model, Max, only(y))
+    optimize!(model)
+    @test termination_status(model) == ITERATION_LIMIT
+    @test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2)
+    y = MathOptAI.add_predictor(model, chain, x; gray_box = true)
+    @test y isa Vector{VariableRef}
+    config = Dict(Flux.relu => MathOptAI.ReLU())
+    @test_throws(
+        ErrorException(
+            "cannot specify the `config` kwarg if `gray_box = true`",
+        ),
+        MathOptAI.add_predictor(model, chain, x; gray_box = true, config),
+    )
+    return
+end
+
+function test_gray_box_vector_output()
+    chain = Flux.Chain(Flux.Dense(3 => 16, Flux.relu), Flux.Dense(16 => 2))
+    model = Model(Ipopt.Optimizer)
+    set_silent(model)
+    set_attribute(model, "max_iter", 5)
+    @variable(model, 0 <= x[1:3] <= 1)
+    y = MathOptAI.add_predictor(
+        model,
+        chain,
+        x;
+        gray_box = true,
+        reduced_space = true,
+    )
+    @test length(y) == 2
+    @objective(model, Max, sum(y))
+    optimize!(model)
+    @test termination_status(model) == ITERATION_LIMIT
+    @test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2)
+    return
+end
+
 end  # module
 
 TestFluxExt.runtests()