Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GrayBox predictor #96

Merged
merged 7 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ Affine
BinaryDecisionTree
```

## `GrayBox`
```@docs
GrayBox
```

## `Pipeline`
```@docs
Pipeline
Expand Down
1 change: 1 addition & 0 deletions docs/src/manual/predictors.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The following predictors are supported. See their docstrings for details:
| :----------------- | :------------------------------------- | :--------- |
| [`Affine`](@ref) | $f(x) = Ax + b$ | $M \rightarrow N$ |
| [`BinaryDecisionTree`](@ref) | A binary decision tree | $M \rightarrow 1$ |
| [`GrayBox`](@ref) | $f(x)$ | $M \rightarrow N$ |
| [`Pipeline`](@ref) | $f(x) = (l_1 \circ \ldots \circ l_N)(x)$ | $M \rightarrow N$ |
| [`Quantile`](@ref) | The quantiles of a distribution | $M \rightarrow N$ |
| [`ReLU`](@ref) | $f(x) = \max.(0, x)$ | $M \rightarrow M$ |
Expand Down
27 changes: 26 additions & 1 deletion ext/MathOptAIFluxExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import MathOptAI
x::Vector;
config::Dict = Dict{Any,Any}(),
reduced_space::Bool = false,
gray_box::Bool = false,
)

Add a trained neural network from Flux.jl to `model`.
Expand All @@ -40,6 +41,8 @@ Add a trained neural network from Flux.jl to `model`.
[`AbstractPredictor`](@ref)s that control how the activation functions are
reformulated. For example, `Flux.sigmoid => MathOptAI.Sigmoid()` or
`Flux.relu => MathOptAI.QuadraticReLU()`.
* `gray_box`: if `true`, the neural network is added as a user-defined
nonlinear operator, with gradients provided by `Flux.withjacobian`.

## Example

Expand Down Expand Up @@ -68,8 +71,9 @@ function MathOptAI.add_predictor(
x::Vector;
config::Dict = Dict{Any,Any}(),
odow marked this conversation as resolved.
Show resolved Hide resolved
reduced_space::Bool = false,
gray_box::Bool = false,
)
inner_predictor = MathOptAI.build_predictor(predictor; config)
inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box)
if reduced_space
inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
end
Expand All @@ -80,6 +84,7 @@ end
MathOptAI.build_predictor(
predictor::Flux.Chain;
config::Dict = Dict{Any,Any}(),
gray_box::Bool = false,
)

Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
Expand All @@ -103,6 +108,8 @@ Convert a trained neural network from Flux.jl to a [`Pipeline`](@ref).
[`AbstractPredictor`](@ref)s that control how the activation functions are
reformulated. For example, `Flux.sigmoid => MathOptAI.Sigmoid()` or
`Flux.relu => MathOptAI.QuadraticReLU()`.
* `gray_box`: if `true`, the neural network is added as a user-defined
nonlinear operator, with gradients provided by `Flux.withjacobian`.

## Example

Expand Down Expand Up @@ -133,14 +140,32 @@ Pipeline with layers:
function MathOptAI.build_predictor(
predictor::Flux.Chain;
config::Dict = Dict{Any,Any}(),
gray_box::Bool = false,
)
if gray_box
if !isempty(config)
error("cannot specify the `config` kwarg if `gray_box = true`")
end
return MathOptAI.GrayBox(predictor)
end
inner_predictor = MathOptAI.Pipeline(MathOptAI.AbstractPredictor[])
for layer in predictor.layers
_add_predictor(inner_predictor, layer, config)
end
return inner_predictor
end

function MathOptAI.GrayBox(predictor::Flux.Chain)
function output_size(x)
return only(Flux.outputsize(predictor, (length(x),)))
end
function with_jacobian(x)
ret = Flux.withjacobian(x -> predictor(Float32.(x)), collect(x))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will the Flux model always use Float32?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's the default. Different precision is a bit all over the place, since if you throw x::Vector{Float64} in, it automatically changes your weights to the same type. I dislike this aspect of Flux. At the very least, let's wait until someone complains before addressing this. It works for the tests.

return (value = ret.val, jacobian = only(ret.grad))
end
return MathOptAI.GrayBox(output_size, with_jacobian)
end

function _add_predictor(::MathOptAI.Pipeline, layer::Any, ::Dict)
return error("Unsupported layer: $layer")
end
Expand Down
35 changes: 34 additions & 1 deletion ext/MathOptAIPythonCallExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import MathOptAI
x::Vector;
config::Dict = Dict{Any,Any}(),
reduced_space::Bool = false,
gray_box::Bool = false,
)

Add a trained neural network from Pytorch via PythonCall.jl to `model`.
Expand All @@ -35,15 +36,18 @@ Add a trained neural network from Pytorch via PythonCall.jl to `model`.
that control how the activation functions are reformulated. For example,
`:Sigmoid => MathOptAI.Sigmoid()` or `:ReLU => MathOptAI.QuadraticReLU()`.
The supported Symbols are `:ReLU`, `:Sigmoid`, and `:Tanh`.
* `gray_box`: if `true`, the neural network is added as a user-defined
nonlinear operator, with gradients provided by `torch.func.jacrev`.
"""
function MathOptAI.add_predictor(
model::JuMP.AbstractModel,
predictor::MathOptAI.PytorchModel,
x::Vector;
config::Dict = Dict{Any,Any}(),
reduced_space::Bool = false,
gray_box::Bool = false,
)
inner_predictor = MathOptAI.build_predictor(predictor; config)
inner_predictor = MathOptAI.build_predictor(predictor; config, gray_box)
if reduced_space
inner_predictor = MathOptAI.ReducedSpace(inner_predictor)
end
Expand All @@ -54,6 +58,7 @@ end
MathOptAI.build_predictor(
predictor::MathOptAI.PytorchModel;
config::Dict = Dict{Any,Any}(),
gray_box::Bool = false,
)

Convert a trained neural network from Pytorch via PythonCall.jl to a
Expand All @@ -73,11 +78,20 @@ Convert a trained neural network from Pytorch via PythonCall.jl to a
that control how the activation functions are reformulated. For example,
`:Sigmoid => MathOptAI.Sigmoid()` or `:ReLU => MathOptAI.QuadraticReLU()`.
The supported Symbols are `:ReLU`, `:Sigmoid`, and `:Tanh`.
* `gray_box`: if `true`, the neural network is added as a user-defined
nonlinear operator, with gradients provided by `torch.func.jacrev`.
"""
function MathOptAI.build_predictor(
predictor::MathOptAI.PytorchModel;
config::Dict = Dict{Any,Any}(),
gray_box::Bool = false,
)
if gray_box
if !isempty(config)
error("cannot specify the `config` kwarg if `gray_box = true`")
end
return MathOptAI.GrayBox(predictor)
end
torch = PythonCall.pyimport("torch")
nn = PythonCall.pyimport("torch.nn")
torch_model = torch.load(predictor.filename)
Expand All @@ -104,4 +118,23 @@ function _predictor(nn, layer, config)
return error("unsupported layer: $layer")
end

function MathOptAI.GrayBox(predictor::MathOptAI.PytorchModel)
torch = PythonCall.pyimport("torch")
torch_model = torch.load(predictor.filename)
J = torch.func.jacrev(torch_model)
# TODO(odow): I'm not sure if there is a better way to get the output
# dimension of a torch model object?
output_size(::Any) = PythonCall.pyconvert(Int, torch_model[-1].out_features)
function with_jacobian(x)
py_x = torch.tensor(collect(x))
py_value = torch_model(py_x).detach().numpy()
py_jacobian = J(py_x).detach().numpy()
return (;
value = PythonCall.pyconvert(Vector, py_value),
jacobian = PythonCall.pyconvert(Matrix, py_jacobian),
)
end
return MathOptAI.GrayBox(output_size, with_jacobian)
end

end # module
100 changes: 100 additions & 0 deletions src/predictors/GrayBox.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright (c) 2024: Oscar Dowson and contributors
# Copyright (c) 2024: Triad National Security, LLC
#
# Use of this source code is governed by a BSD-style license that can be found
# in the LICENSE.md file.

"""
GrayBox(
output_size::Function,
with_jacobian::Function,
) <: AbstractPredictor

An [`AbstractPredictor`](@ref) that represents the function ``f(x)`` as a
user-defined nonlinear operator.

## arguments

* `output_size(x::Vector):Int`: given an input vector `x`, return the dimension
of the output vector
* `with_jacobian(x::Vector)::NamedTuple -> (;value, jacobian)`: given an input
vector `x`, return a `NamedTuple` that computes the primal value and Jacobian
of the output value with respect to the input. `jacobian[j, i]` is the
partial derivative of `value[j]` with respect to `x[i]`.

## Example

```jldoctest; filter=r"##[0-9]+"
julia> using JuMP, MathOptAI

julia> model = Model();

julia> @variable(model, x[1:2]);

julia> f = MathOptAI.GrayBox(
x -> 2,
x -> (value = x.^2, jacobian = [2 * x[1] 0.0; 0.0 2 * x[2]]),
);

julia> y = MathOptAI.add_predictor(model, f, x)
2-element Vector{VariableRef}:
moai_GrayBox[1]
moai_GrayBox[2]

julia> print(model)
Feasibility
Subject to
op_##238(x[1], x[2]) - moai_GrayBox[1] = 0
op_##239(x[1], x[2]) - moai_GrayBox[2] = 0

julia> y = MathOptAI.add_predictor(model, MathOptAI.ReducedSpace(f), x)
2-element Vector{NonlinearExpr}:
op_##240(x[1], x[2])
op_##241(x[1], x[2])
```
"""
struct GrayBox{F<:Function,G<:Function} <: AbstractPredictor
output_size::F
with_jacobian::G
end

function add_predictor(model::JuMP.AbstractModel, predictor::GrayBox, x::Vector)
op = add_predictor(model, ReducedSpace(predictor), x)
y = JuMP.@variable(model, [1:length(op)], base_name = "moai_GrayBox")
JuMP.@constraint(model, op .== y)
return y
end

function add_predictor(
model::JuMP.AbstractModel,
predictor::ReducedSpace{<:GrayBox},
x::Vector,
)
last_x, cache = nothing, nothing
function update(x)
if x != last_x
cache = predictor.predictor.with_jacobian(x)
last_x = x
end
return
end
function f(i::Int, x...)::Float64
update(x)
return cache.value[i]
end
function ∇f(g::AbstractVector{Float64}, i::Int, x...)
update(x)
g .= cache.jacobian[i, :]
return
end
return map(1:predictor.predictor.output_size(x)) do i
op_i = JuMP.add_nonlinear_operator(
model,
length(x),
(x...) -> f(i, x...),
(g, x...) -> ∇f(g, i, x...);
name = Symbol("op_$(gensym())"),
)
return op_i(x...)
Comment on lines +90 to +98

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to add a Hessian function?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do people compute Hessian's of NNs? Or you just want arbitrary possibility? Do you have an example where this is useful?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My sense is to leave as-is for the first pass. We can always add it later.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In a paper I am about to submit, we used hessians with a PyTorch NN in an optimal control problem and saw a significant speed up. This was done with PyNumero's graybox interface.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you link me the code of getting Hessians etc out of torch?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll dig it up from my former student that just graduated. In the meantime, I know that we used torch.func which provides functions to evaluate the Jacobian and the Hessian directly: https://pytorch.org/docs/stable/func.api.html

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also found jacobian = torch.autograd.functional.jacobian(model, x). But func seems better.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We tried torch.autograd.functional, but it was quite a bit slower. Notably, we did leverage the batch abilities of torch.func to evaluate all the gradients of a NN over a different sets of inputs which probably gave torch.func an extra advantage.

end
end
50 changes: 50 additions & 0 deletions test/test_Flux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,56 @@ function test_unsupported_layer()
return
end

function test_gray_box_scalar_output()
chain = Flux.Chain(Flux.Dense(2 => 16, Flux.relu), Flux.Dense(16 => 1))
model = Model(Ipopt.Optimizer)
set_silent(model)
set_attribute(model, "max_iter", 5)
@variable(model, 0 <= x[1:2] <= 1)
y = MathOptAI.add_predictor(
model,
chain,
x;
gray_box = true,
reduced_space = true,
)
@objective(model, Max, only(y))
optimize!(model)
@test termination_status(model) == ITERATION_LIMIT
@test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2)
y = MathOptAI.add_predictor(model, chain, x; gray_box = true)
@test y isa Vector{VariableRef}
config = Dict(Flux.relu => MathOptAI.ReLU())
@test_throws(
ErrorException(
"cannot specify the `config` kwarg if `gray_box = true`",
),
MathOptAI.add_predictor(model, chain, x; gray_box = true, config),
)
return
end

function test_gray_box_vector_output()
chain = Flux.Chain(Flux.Dense(3 => 16, Flux.relu), Flux.Dense(16 => 2))
model = Model(Ipopt.Optimizer)
set_silent(model)
set_attribute(model, "max_iter", 5)
@variable(model, 0 <= x[1:3] <= 1)
y = MathOptAI.add_predictor(
model,
chain,
x;
gray_box = true,
reduced_space = true,
)
@test length(y) == 2
@objective(model, Max, sum(y))
optimize!(model)
@test termination_status(model) == ITERATION_LIMIT
@test isapprox(value.(y), chain(Float32.(value.(x))); atol = 1e-2)
return
end

end # module

TestFluxExt.runtests()
Loading