From 6b9c1b632a544653446926b08b2459a1bb37afd6 Mon Sep 17 00:00:00 2001 From: DrChainsaw Date: Sun, 10 Apr 2022 14:38:17 +0200 Subject: [PATCH 1/4] Updates for Flux 0.13 mainly: Partial handling of groups in Conv due to deprecation of DepthwiseConv Flux.Diagonal => Flux.Scale --- Project.toml | 4 ++-- src/NaiveNASflux.jl | 2 +- src/constraints.jl | 42 +++++++++++++++++++++++++++++++----------- src/mutable.jl | 24 ++++++++++++++++-------- src/neuronutility.jl | 15 ++++++++++----- src/select.jl | 6 +++--- src/types.jl | 22 ++++++++++++++-------- src/util.jl | 30 ++++++++++++++++++------------ src/vertex.jl | 4 ++-- test/mutable.jl | 18 ++++++++++-------- test/neuronutility.jl | 2 +- test/runtests.jl | 7 ++----- test/util.jl | 8 ++++++++ test/vertex.jl | 18 +++++++++--------- 14 files changed, 127 insertions(+), 75 deletions(-) diff --git a/Project.toml b/Project.toml index f64566f..2e920f8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "NaiveNASflux" uuid = "85610aed-7d32-5e57-bb50-4c2e1c9e7997" -version = "2.0.4" +version = "2.0.5" [deps] Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" @@ -14,7 +14,7 @@ Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] -Flux = "0.12" +Flux = "0.13" Functors = "0.2" JuMP = "0.19, 0.20, 0.21, 0.22, 0.23, 1" NaiveNASlib = "2" diff --git a/src/NaiveNASflux.jl b/src/NaiveNASflux.jl index f38f1eb..63c4dd7 100644 --- a/src/NaiveNASflux.jl +++ b/src/NaiveNASflux.jl @@ -4,7 +4,7 @@ using Reexport @reexport using NaiveNASlib using NaiveNASlib.Extend, NaiveNASlib.Advanced import Flux -using Flux: Dense, Conv, ConvTranspose, DepthwiseConv, CrossCor, LayerNorm, BatchNorm, InstanceNorm, GroupNorm, +using Flux: Dense, Conv, ConvTranspose, CrossCor, LayerNorm, BatchNorm, InstanceNorm, GroupNorm, MaxPool, MeanPool, Dropout, AlphaDropout, GlobalMaxPool, GlobalMeanPool, cpu import Functors using Functors: @functor diff --git a/src/constraints.jl b/src/constraints.jl index 09e8500..ab161f3 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -86,10 +86,11 @@ function NaiveNASlib.compconstraint!(case, s::DecoratingJuMPΔSizeStrategy, lt:: NaiveNASlib.compconstraint!(case, NaiveNASlib.base(s), lt, data) end # To avoid ambiguity -function NaiveNASlib.compconstraint!(case::NaiveNASlib.ScalarSize, s::DecoratingJuMPΔSizeStrategy, lt::FluxDepthwiseConv, data) +function NaiveNASlib.compconstraint!(case::NaiveNASlib.ScalarSize, s::DecoratingJuMPΔSizeStrategy, lt::FluxConvolutional, data) NaiveNASlib.compconstraint!(case, NaiveNASlib.base(s), lt, data) end -function NaiveNASlib.compconstraint!(::NaiveNASlib.ScalarSize, s::AbstractJuMPΔSizeStrategy, ::FluxDepthwiseConv, data, ms=allowed_multipliers(s)) +function NaiveNASlib.compconstraint!(::NaiveNASlib.ScalarSize, s::AbstractJuMPΔSizeStrategy, ::FluxConvolutional, data, ms=allowed_multipliers(s)) + ngroups(data.vertex) == 1 && return # Add constraint that nout(l) == n * nin(l) where n is integer ins = filter(vin -> vin in keys(data.noutdict), inputs(data.vertex)) @@ -119,14 +120,15 @@ allowed_multipliers(s::DepthwiseConvSimpleΔSizeStrategy) = s.allowed_multiplier allowed_multipliers(::AbstractJuMPΔSizeStrategy) = 1:10 -function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::DecoratingJuMPΔSizeStrategy, t::FluxDepthwiseConv, data) +function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::DecoratingJuMPΔSizeStrategy, t::FluxConvolutional, data) NaiveNASlib.compconstraint!(case, base(s), t, data) end -function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::AbstractJuMPΔSizeStrategy, t::FluxDepthwiseConv, data) +function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::AbstractJuMPΔSizeStrategy, t::FluxConvolutional, data) + ngroups(data.vertex) == 1 && return # Fallbacks don't matter here since we won't call it from below here, just add default so we don't accidentally crash due to some # strategy which hasn't defined a fallback if 15 < sum(keys(data.outselectvars)) do v - layertype(v) isa FluxDepthwiseConv || return 0 + ngroups(v) == 1 && return 0 return log2(nout(v)) # Very roughly determined... end return NaiveNASlib.compconstraint!(case, DepthwiseConvSimpleΔSizeStrategy(10, s, NaiveNASlib.DefaultJuMPΔSizeStrategy()), t, data) @@ -154,22 +156,29 @@ function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::Abstrac =# end -function NaiveNASlib.compconstraint!(::NaiveNASlib.NeuronIndices, s::DepthwiseConvSimpleΔSizeStrategy, t::FluxDepthwiseConv, data) +function NaiveNASlib.compconstraint!(::NaiveNASlib.NeuronIndices, s::DepthwiseConvSimpleΔSizeStrategy, t::FluxConvolutional, data) model = data.model v = data.vertex select = data.outselectvars[v] insert = data.outinsertvars[v] + + ngroups(v) == 1 && return nin(v)[] == 1 && return # Special case, no restrictions as we only need to be an integer multple of 1 - ngroups = div(nout(v), nin(v)[]) + if size(weights(layer(v)), indim(v)) != 1 + @warn "Handling of convolutional layers with groups != nin not implemented. Model might not be size aligned after mutation!" + end + # Neurons mapped to the same weight are interleaved, i.e layer.weight[:,:,1,:] maps to y[1:ngroups:end] where y = layer(x) - for group in 1:ngroups - neurons_in_group = select[group : ngroups : end] + ngrps = div(nout(v), nin(v)[]) + + for group in 1:ngrps + neurons_in_group = select[group : ngrps : end] @constraint(model, neurons_in_group[1] == neurons_in_group[end]) @constraint(model, [i=2:length(neurons_in_group)], neurons_in_group[i] == neurons_in_group[i-1]) - insert_in_group = insert[group : ngroups : end] + insert_in_group = insert[group : ngrps : end] @constraint(model, insert_in_group[1] == insert_in_group[end]) @constraint(model, [i=2:length(insert_in_group)], insert_in_group[i] == insert_in_group[i-1]) end @@ -177,14 +186,18 @@ function NaiveNASlib.compconstraint!(::NaiveNASlib.NeuronIndices, s::DepthwiseCo NaiveNASlib.compconstraint!(NaiveNASlib.ScalarSize(), s, t, data, allowed_multipliers(s)) end -function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::DepthwiseConvAllowNinChangeStrategy, t::FluxDepthwiseConv, data) +function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::DepthwiseConvAllowNinChangeStrategy, t::FluxConvolutional, data) model = data.model v = data.vertex select = data.outselectvars[v] insert = data.outinsertvars[v] + ngroups(v) == 1 && return nin(v)[] == 1 && return # Special case, no restrictions as we only need to be an integer multple of 1? + # Step 0: + # Flux 0.13 changed the grouping of weigths so that size(layer.weight) = (..., nin / ngroups, nout) + # We can get back the shape expected here through weightgroups = reshape(layer.weight, ..., nout / groups, nin) # Step 1: # Neurons mapped to the same weight are interleaved, i.e layer.weight[:,:,1,:] maps to y[1:ngroups:end] where y = layer(x) # where ngroups = nout / nin. For example, nout = 12 and nin = 4 mean size(layer.weight) == (..,3, 4) @@ -199,6 +212,9 @@ function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::Depthwi ininsert = data.outinsertvars[ins[]] #ngroups = div(nout(v), nin(v)[]) + if size(weights(layer(v)), indim(v)) != 1 + @warn "Handling of convolutional layers with groups != nin not implemented. Model might not be size aligned after mutation!" + end ningroups = nin(v)[] add_depthwise_constraints(model, inselect, ininsert, select, insert, ningroups, s.allowed_new_outgroups, s.allowed_multipliers) end @@ -213,6 +229,10 @@ function add_depthwise_constraints(model, inselect, ininsert, select, insert, ni # Inserting one new input element at position i will get us noutgroups new consecutive outputputs at position i # Thus nout change by Δ * noutgroups. + # Note: Flux 0.13 changed the grouping of weigths so that size(layer.weight) = (..., nin / ngroups, nout) + # We can get back the shape expected here through weightgroups = reshape(layer.weight, ..., nout / groups, nin) + # All examples below assume the pre-0.13 representation! + # Example: # dc = DepthwiseConv((1,1), 3 => 9; bias=false); diff --git a/src/mutable.jl b/src/mutable.jl index 902a1ae..4789876 100644 --- a/src/mutable.jl +++ b/src/mutable.jl @@ -63,7 +63,9 @@ function mutate(m::MutableLayer; inputs, outputs, other = l -> (), insert=neuron end end -function mutate(lt::FluxParLayer, m::MutableLayer; inputs=1:nin(m)[], outputs=1:nout(m), other= l -> (), insert=neuroninsert) +mutate(lt::FluxParLayer, m::MutableLayer; kwargs...) = _mutate(lt, m; kwargs...) + +function _mutate(lt::FluxParLayer, m::MutableLayer; inputs=1:nin(m)[], outputs=1:nout(m), other= l -> (), insert=neuroninsert) l = layer(m) otherdims = other(l) w = select(weights(l), indim(l) => inputs, outdim(l) => outputs, otherdims...; newfun=insert(lt, WeightParam())) @@ -72,19 +74,25 @@ function mutate(lt::FluxParLayer, m::MutableLayer; inputs=1:nin(m)[], outputs=1: end otherpars(o, l) = () -function mutate(lt::FluxDepthwiseConv{N}, m::MutableLayer; inputs=1:nin(m)[], outputs=1:nout(m), other= l -> (), insert=neuroninsert) where N +function mutate(lt::FluxConvolutional{N}, m::MutableLayer; inputs=1:nin(m)[], outputs=1:nout(m), other= l -> (), insert=neuroninsert) where N + + if ngroups(lt, layer(m)) == 1 + return _mutate(lt, m; inputs, outputs, other, insert) + end + l = layer(m) otherdims = other(l) - ngroups = div(length(outputs), length(inputs)) + # TODO: Handle other cases than ngroups == nin + newingroups = 1 # inputs and outputs are coupled through the constraints (which hopefully were enforced) so we only need to consider outputs currsize =size(weights(l)) wo = select(reshape(weights(l), currsize[1:N]...,:), N+1 => outputs, otherdims...; newfun=insert(lt, WeightParam())) newks = size(wo)[1:N] - w = collect(reshape(wo, newks...,ngroups, :)) + w = collect(reshape(wo, newks...,newingroups, :)) b = select(bias(l), 1 => outputs; newfun=insert(lt, BiasParam())) - newlayer(m, w, b, otherpars(other, l)) + newlayer(m, w, b, (;groups= length(inputs) ÷ newingroups, otherpars(other, l)...)) end function mutate(lt::FluxRecurrent, m::MutableLayer; inputs=1:nin(m)[], outputs=1:nout(m), other=missing, insert=neuroninsert) @@ -131,7 +139,7 @@ function mutate(t::FluxParInvLayer, m::MutableLayer; inputs=missing, outputs=mis ismissing(outputs) || return mutate(t, m, outputs; insert=insert) end -function mutate(lt::FluxDiagonal, m::MutableLayer, inds; insert=neuroninsert) +function mutate(lt::FluxScale, m::MutableLayer, inds; insert=neuroninsert) l = layer(m) w = select(weights(l), 1 => inds, newfun=insert(lt, WeightParam())) b = select(bias(l), 1 => inds; newfun=insert(lt, BiasParam())) @@ -139,7 +147,7 @@ function mutate(lt::FluxDiagonal, m::MutableLayer, inds; insert=neuroninsert) end function mutate(::FluxLayerNorm, m::MutableLayer, inds; insert=neuroninsert) - # LayerNorm is only a wrapped Diagonal. Just mutate the Diagonal and make a new LayerNorm of it + # LayerNorm is only a wrapped Scale. Just mutate the Scale and make a new LayerNorm of it proxy = MutableLayer(layer(m).diag) mutate(proxy; inputs=inds, outputs=inds, other=l->(), insert=insert) @@ -197,7 +205,7 @@ newlayer(m::MutableLayer, w, b, other=nothing) = m.layer = newlayer(layertype(m) newlayer(::FluxDense, m::MutableLayer, w, b, other) = Dense(w, b, deepcopy(layer(m).σ)) newlayer(::FluxConvolutional, m::MutableLayer, w, b, other) = setproperties(layer(m), (weight=w, bias=b, σ=deepcopy(layer(m).σ), other...)) -newlayer(::FluxDiagonal, m::MutableLayer, w, b, other) = Flux.Diagonal(w, b) +newlayer(::FluxScale, m::MutableLayer, w, b, other) = Flux.Scale(w, b) """ diff --git a/src/neuronutility.jl b/src/neuronutility.jl index 6dfece2..be94a76 100644 --- a/src/neuronutility.jl +++ b/src/neuronutility.jl @@ -62,7 +62,7 @@ function l2_squeeze(x, dimskeep=1:ndims(x)) dims = filter(i -> i ∉ dimskeep, 1:ndims(x)) return sqrt.(dropdims(sum(x -> x^2, x, dims=dims), dims=Tuple(dims))) end -l2_squeeze(z::Flux.Zeros, args...) = z +l2_squeeze(z::Number, args...) = z """ mean_squeeze(f, x, dimkeep) @@ -90,14 +90,19 @@ neuronutility(l) = neuronutility(layertype(l), l) # Default: mean of abs of weights + bias. Not a very good metric, but should be better than random # Maybe do something about state in recurrent layers as well, but CBA to do it right now neuronutility(::FluxParLayer, l) = l2_squeeze(weights(l), outdim(l)) .+ l2_squeeze(bias(l)) -function neuronutility(::FluxDepthwiseConv, l) - wm = l2_squeeze(weights(l), outdim(l)) +function neuronutility(::FluxConvolutional{N}, l) where N + ngroups(l) == 1 && return l2_squeeze(weights(l), outdim(l)) .+ l2_squeeze(bias(l)) + + kernelsize = size(weights(l))[1:N] + weightgroups = reshape(weights(l), kernelsize..., nout(l) ÷ ngroups(l), nin(l)[]) + + wm = l2_squeeze(weightgroups, indim(l)) bm = l2_squeeze(bias(l)) (length(wm) == 1 || length(wm) == length(bm)) && return wm .+ bm # use this to get insight on whether to repeat inner or outer: - # cc = DepthwiseConv(reshape([1 1 1 1;2 2 2 2], 1, 1, 2, 4), [0,0,0,0,1,1,1,1]) - # cc(fill(10, (1,1,4,1))) + # cc = DepthwiseConv(reshape(Float32[1 1 1 1;2 2 2 2], 1, 1, 4, 2), Float32[0,0,0,0,1,1,1,1]) + # cc(fill(10f0, (1,1,4,1))) return repeat(wm, length(bm) ÷ length(wm)) .+ bm end diff --git a/src/select.jl b/src/select.jl index b1cd1dd..6933ed2 100644 --- a/src/select.jl +++ b/src/select.jl @@ -1,7 +1,7 @@ select(pars::AbstractArray{T,N}, elements_per_dim...; newfun = randoutzeroin) where {T, N} = NaiveNASlib.parselect(pars, elements_per_dim...; newfun) select(::Missing, args...;kwargs...) = missing -select(::Flux.Zeros, args...;kwargs...) = Flux.Zeros() +select(s::Number, args...;kwargs...) = s struct WeightParam end struct BiasParam end @@ -21,8 +21,8 @@ neuroninsert(lt::FluxParNorm, t::Val) = norminsert(lt, t) norminsert(::FluxParNorm, ::Union{Val{:β},Val{:μ}}) = (args...) -> 0 norminsert(::FluxParNorm, ::Union{Val{:γ},Val{:σ²}}) = (args...) -> 1 -# Coupling between input and output weights make it difficult to do anything else? -neuroninsert(::FluxDepthwiseConv, partype) = (args...) -> 0 +# Coupling between input and output weights when grouped make it difficult to do anything else? +neuroninsert(lt::FluxConvolutional, partype) = ngroups(lt) == 1 ? randoutzeroin : (args...) -> 0 randoutzeroin(T, d, s...) = _randoutzeroin(T,d,s) _randoutzeroin(T, d, s) = 0 diff --git a/src/types.jl b/src/types.jl index 6bdf539..f83765e 100644 --- a/src/types.jl +++ b/src/types.jl @@ -28,13 +28,19 @@ NaiveNASlib.shapetrait(::Flux.GRUCell) = FluxGru() abstract type FluxConvolutional{N} <: FluxParLayer end struct GenericFluxConvolutional{N} <: FluxConvolutional{N} end -struct FluxConv{N} <: FluxConvolutional{N} end -struct FluxConvTranspose{N} <: FluxConvolutional{N} end -struct FluxDepthwiseConv{N} <: FluxConvolutional{N} end +# Groups here is an eyesore. Its just to not have to tag a breaking version for Flux 0.13 due +# to some functions needing to tell the number of groups from the layertype alone +struct FluxConv{N} <: FluxConvolutional{N} + groups::Int +end +FluxConv{N}() where N = FluxConv{N}(1) +struct FluxConvTranspose{N} <: FluxConvolutional{N} + groups::Int +end +FluxConvTranspose{N}() where N = FluxConvTranspose{N}(1) struct FluxCrossCor{N} <: FluxConvolutional{N} end -NaiveNASlib.shapetrait(::Conv{N}) where N = FluxConv{N}() -NaiveNASlib.shapetrait(::ConvTranspose{N}) where N = FluxConvTranspose{N}() -NaiveNASlib.shapetrait(::DepthwiseConv{N}) where N = FluxDepthwiseConv{N}() +NaiveNASlib.shapetrait(l::Conv{N}) where N = FluxConv{N}(l.groups) +NaiveNASlib.shapetrait(l::ConvTranspose{N}) where N = FluxConvTranspose{N}(l.groups) NaiveNASlib.shapetrait(::CrossCor{N}) where N = FluxCrossCor{N}() @@ -42,14 +48,14 @@ abstract type FluxTransparentLayer <: FluxLayer end # Invariant layers with parameters, i.e nin == nout always and parameter selection must # be performed abstract type FluxParInvLayer <: FluxTransparentLayer end -struct FluxDiagonal <: FluxParInvLayer end +struct FluxScale <: FluxParInvLayer end struct FluxLayerNorm <: FluxParInvLayer end abstract type FluxParNorm <: FluxParInvLayer end struct FluxBatchNorm <: FluxParNorm end struct FluxInstanceNorm <: FluxParNorm end struct FluxGroupNorm <: FluxParNorm end -NaiveNASlib.shapetrait(::Flux.Diagonal) = FluxDiagonal() +NaiveNASlib.shapetrait(::Flux.Scale) = FluxScale() NaiveNASlib.shapetrait(::LayerNorm) = FluxLayerNorm() NaiveNASlib.shapetrait(::BatchNorm) = FluxBatchNorm() NaiveNASlib.shapetrait(::InstanceNorm) = FluxInstanceNorm() diff --git a/src/util.jl b/src/util.jl index e16f236..0809aa9 100644 --- a/src/util.jl +++ b/src/util.jl @@ -2,16 +2,15 @@ NaiveNASlib.nin(t::FluxLayer, l) = throw(ArgumentError("Not implemented for $t") NaiveNASlib.nout(t::FluxLayer, l) = throw(ArgumentError("Not implemented for $t")) NaiveNASlib.nin(::FluxParLayer, l) = [size(weights(l), indim(l))] -NaiveNASlib.nout(::FluxParLayer, l) = size(weights(l), outdim(l)) -NaiveNASlib.nout(::FluxDepthwiseConv, l) = size(weights(l), outdim(l)) * nin(l)[] - +NaiveNASlib.nin(::FluxConvolutional, l) = [size(weights(l), indim(l)) * ngroups(l)] NaiveNASlib.nin(::FluxParInvLayer, l) = [nout(l)] -NaiveNASlib.nout(::FluxDiagonal, l) = length(weights(l)) +NaiveNASlib.nout(::FluxParLayer, l) = size(weights(l), outdim(l)) + +NaiveNASlib.nout(::FluxScale, l) = length(weights(l)) NaiveNASlib.nout(::FluxParInvLayer, l::LayerNorm) = nout(l.diag) NaiveNASlib.nout(::FluxParNorm, l) = l.chs - NaiveNASlib.nout(::FluxRecurrent, l) = div(size(weights(l), outdim(l)), outscale(l)) outscale(l) = outscale(layertype(l)) @@ -34,10 +33,10 @@ outdim(::Flux2D) = 1 actdim(::Flux2D) = 1 actrank(::Flux2D) = 1 -indim(::FluxDiagonal) = 1 -outdim(::FluxDiagonal) = 1 -actdim(::FluxDiagonal) = 1 -actrank(::FluxDiagonal) = 1 +indim(::FluxScale) = 1 +outdim(::FluxScale) = 1 +actdim(::FluxScale) = 1 +actrank(::FluxScale) = 1 indim(::FluxRecurrent) = 2 outdim(::FluxRecurrent) = 1 @@ -50,7 +49,7 @@ actdim(::FluxConvolutional{N}) where N = 1+N actrank(::FluxConvolutional{N}) where N = 1+N indim(::Union{FluxConv{N}, FluxCrossCor{N}}) where N = 1+N outdim(::Union{FluxConv{N}, FluxCrossCor{N}}) where N = 2+N -# Note: Absence of bias mean that bias is of type Flux.Zeros which mostly behaves like a normal array, mostly... +# Note: Absence of bias mean that bias is a Bool (false), so beware! weights(l) = weights(layertype(l), l) bias(l) = bias(layertype(l), l) @@ -60,8 +59,8 @@ bias(::FluxDense, l) = l.bias weights(::FluxConvolutional, l) = l.weight bias(::FluxConvolutional, l) = l.bias -weights(::FluxDiagonal, l) = l.α -bias(::FluxDiagonal, l) = l.β +weights(::FluxScale, l) = l.scale +bias(::FluxScale, l) = l.bias weights(lt::FluxRecurrent, l::Flux.Recur) = weights(lt, l.cell) bias(lt::FluxRecurrent, l::Flux.Recur) = bias(lt, l.cell) @@ -80,3 +79,10 @@ hiddenstate(::FluxLstm, cell::Flux.LSTMCell) = [h for h in cell.state0] state(l) = state(layertype(l), l) state(::FluxRecurrent, l) = l.state state(::FluxLstm, l) = [h for h in l.state] + +ngroups(l) = ngroups(layertype(l), l) +ngroups(lt, l) = 1 +ngroups(lt::FluxConvolutional, l) = ngroups(lt) +ngroups(::FluxConvolutional) = 1 +ngroups(lt::FluxConv) = lt.groups +ngroups(lt::FluxConvTranspose) = lt.groups diff --git a/src/vertex.jl b/src/vertex.jl index 7400f6c..380294e 100644 --- a/src/vertex.jl +++ b/src/vertex.jl @@ -103,7 +103,7 @@ layertype(l::LayerTypeWrapper) = l.t Trait for computations for which a change in output size results in a change in input size but which is not fully `SizeTransparent`. -Example of this is DepthWiseConv where output size must be an integer multiple of the input size. +Example of this is grouped convolutions where output size must be an integer multiple of the input size. Does not create any constraints or objectives, only signals that vertices after a `SizeNinNoutConnected` might need to change size if the size of the `SizeNinNoutConnected` vertex changes. @@ -147,7 +147,7 @@ fluxvertex(name::AbstractString, l, in::AbstractVertex; layerfun=LazyMutable, tr fluxvertex(::FluxParLayer, l, in::AbstractVertex, layerfun, traitfun) = absorbvertex(layerfun(MutableLayer(l)), in, traitdecoration = traitfun) -fluxvertex(::FluxDepthwiseConv, l, in::AbstractVertex, layerfun, traitfun) = absorbvertex(layerfun(MutableLayer(l)), in; traitdecoration=traitfun ∘ SizeNinNoutConnected) +fluxvertex(::FluxConvolutional, l, in::AbstractVertex, layerfun, traitfun) = absorbvertex(layerfun(MutableLayer(l)), in; traitdecoration= ngroups(l) == 1 ? traitfun : traitfun ∘ SizeNinNoutConnected) fluxvertex(::FluxParInvLayer, l, in::AbstractVertex, layerfun, traitfun) = invariantvertex(layerfun(MutableLayer(l)), in, traitdecoration=traitfun ∘ FixedSizeTrait) diff --git a/test/mutable.jl b/test/mutable.jl index 0ee64b2..abb618c 100644 --- a/test/mutable.jl +++ b/test/mutable.jl @@ -50,8 +50,8 @@ end @testset "No bias" begin - m = MutableLayer(Dense(rand(3,2), Flux.Zeros())) - @test bias(layer(m)) == Flux.Zeros() + m = MutableLayer(Dense(rand(3,2), false)) + @test bias(layer(m)) == false @test nin(m) == [2] @test nout(m) == 3 @@ -59,7 +59,7 @@ inds = [2,3] Wexp = weights(layer(m))[inds, :] NaiveNASlib.Δsize!(m,_nins(m), inds) - assertlayer(layer(m), Wexp, Flux.Zeros()) + assertlayer(layer(m), Wexp, false) end end @testset "Convolutional layers" begin @@ -120,8 +120,8 @@ end @testset "No bias" begin - m = MutableLayer(Conv(Flux.convfilter((2,3), 4=>5), Flux.Zeros())) - @test bias(layer(m)) == Flux.Zeros() + m = MutableLayer(Conv(Flux.convfilter((2,3), 4=>5), false)) + @test bias(layer(m)) == false @test nin(m) == [4] @test nout(m) == 5 @@ -129,7 +129,7 @@ inds = [2,3] Wexp = weights(layer(m))[:,:,:,inds] NaiveNASlib.Δsize!(m, _nins(m), inds) - assertlayer(layer(m), Wexp, Flux.Zeros()) + assertlayer(layer(m), Wexp, false) end end @@ -162,7 +162,8 @@ wins = [1, 3] wouts = [1, 2, 5, 6] outputs = mapreduce(i -> wouts .+ (i-1) .* 6, vcat, wins) - Wexp, bexp = weights(m.layer)[:,:,wouts,wins], bias(m.layer)[outputs] + Wexp = reshape(reshape(weights(m.layer), 2, 2, 6, 3)[:,:,wouts,wins], 2, 2, 1, :) + bexp = bias(m.layer)[outputs] NaiveNASlib.Δsize!(m, [wins], outputs) assertlayer(m.layer, Wexp, bexp) @test size(m(ones(Float32, 3,3,2,2)))[3:4] == (8, 2) @@ -497,7 +498,8 @@ wins = [1, 3] wouts = [1, 2, 5, 6] outs = mapreduce(i -> wouts .+ (i-1) .* 6, vcat, wins) - Wexp, bexp = weights(layer(m))[:,:,wouts,wins], bias(layer(m))[outs] + Wexp = reshape(reshape(weights(layer(m)), 2, 2, 6, 3)[:,:,wouts,wins], 2, 2, 1, :) + bexp = bias(layer(m))[outs] NaiveNASlib.Δsize!(m, [wins], outs) @test size(m(ones(Float32, 3,3,2,2)))[3:4] == (8, 2) diff --git a/test/neuronutility.jl b/test/neuronutility.jl index 1bacc43..d3b5afe 100644 --- a/test/neuronutility.jl +++ b/test/neuronutility.jl @@ -50,7 +50,7 @@ end @testset "Neuron utility Dense default no bias" begin - l = ml(Dense(ones(5, 3), Flux.Zeros())) + l = ml(Dense(ones(5, 3), false)) @test size(neuronutility(l)) == (5,) @test neuronutility(l) ≈ neuronutility_safe(l) end diff --git a/test/runtests.jl b/test/runtests.jl index 13db396..dda280b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,11 +3,8 @@ using NaiveNASlib.Advanced, NaiveNASlib.Extend function assertlayer(l, Wexp, bexp) @test size(Wexp) == size(weights(l)) - if bexp isa Flux.Zeros - @test bias(l) isa Flux.Zeros - else - @test size(bexp) == size(bias(l)) - end + @test size(bexp) == size(bias(l)) + @test Wexp == weights(l) @test bexp == bias(l) end diff --git a/test/util.jl b/test/util.jl index 2d5b80f..c8e4a0f 100644 --- a/test/util.jl +++ b/test/util.jl @@ -72,4 +72,12 @@ @test_throws ArgumentError indim(BogusLayer()) @test_throws ArgumentError outdim(BogusLayer()) end + + @testset "ngroups" begin + import NaiveNASflux: ngroups + + @test ngroups(DepthwiseConv((2,), 3 => 9)) == ngroups(Conv((2,), 3 => 9; groups=3)) == ngroups(ConvTranspose((2,), 3 => 9; groups=3)) == 3 + @test ngroups(Conv((3,3), 10 => 30; groups=5)) == ngroups(ConvTranspose((3,3), 10 => 30; groups=5)) == 5 + @test ngroups(Conv((3,3), 10 => 30; groups=2)) == ngroups(ConvTranspose((3,3), 10 => 30; groups=2)) == 2 + end end diff --git a/test/vertex.jl b/test/vertex.jl index ec1db34..3e959b5 100644 --- a/test/vertex.jl +++ b/test/vertex.jl @@ -154,7 +154,7 @@ end # just to check that I have understood the wiring of the weight @testset "4 inputs times 2" begin inpt = inputvertex("in", 4, FluxConv{2}()) - dc = fluxvertex("dc", DepthwiseConv(reshape(Float32[10 10 10 10;20 20 20 20], 1, 1, 2, 4), Float32[0,0,0,0,1,1,1,1]), inpt) + dc = fluxvertex("dc", DepthwiseConv(reshape(Float32[10 10 10 10;20 20 20 20], 1, 1, 4, 2), Float32[0,0,0,0,1,1,1,1]), inpt) @test neuronutility(dc) == [20, 40, 20, 40, 21, 41, 21, 41] @test reshape(dc(fill(1f0, (1,1,4,1))), :) == [10, 20, 10, 20, 11, 21, 11, 21] @test Δnout!( dc => -4) @@ -168,7 +168,7 @@ end @testset "2 inputs times 3" begin inpt = inputvertex("in", 2, FluxConv{2}()) - dc = fluxvertex("dc", DepthwiseConv(reshape(Float32[10 10;20 20;30 30], 1, 1, 3, 2), Float32[0,0,1,1,2,2]), inpt) + dc = fluxvertex("dc", DepthwiseConv(reshape(Float32[10 10;20 20;30 30], 1, 1, 2, 3), Float32[0,0,1,1,2,2]), inpt) @test reshape(dc(fill(1f0, (1,1,2,1))), :) == [10, 20, 31, 11, 22, 32] @test Δnout!(dc => -2) @test lazyouts(dc) == [2,3,5,6] @@ -181,7 +181,7 @@ end @testset "1 input times 5" begin inpt = inputvertex("in", 1, FluxConv{2}()) - dc = fluxvertex("dc", DepthwiseConv(reshape(Float32.(10:10:50), 1, 1, 5, 1), Float32.(1:5)), inpt) + dc = fluxvertex("dc", DepthwiseConv(reshape(Float32.(10:10:50), 1, 1, 1, 5), Float32.(1:5)), inpt) @test reshape(dc(fill(1f0, (1,1,1,1))), :) == [11, 22, 33, 44, 55] @test Δnout!(dc=>-2) @test lazyouts(dc) == 3:5 @@ -194,7 +194,7 @@ end @testset "3 inputs times 7" begin inpt = inputvertex("in", 3, FluxConv{2}()) - dc = fluxvertex("dc", DepthwiseConv(reshape(repeat(Float32.(10:10:70), 3), 1,1,7,3), Float32.(1:21)), inpt) + dc = fluxvertex("dc", DepthwiseConv(reshape(repeat(Float32.(10:10:70), 3), 1,1,3,7), Float32.(1:21)), inpt) @test reshape(dc(fill(10f0, (1,1,3,1))), :) == repeat(100:100:700, 3) .+ (1:21) @test Δnout!(dc => -9) do v v == dc || return 1 @@ -270,9 +270,9 @@ end # Test that we actually succeeded in making a valid model y1 = dc1(ones(Float32, 3,3, nout(inpt), 2)) - @test size(y1, outdim(dc1)) == nout(dc1) + @test size(y1)[end-1] == nout(dc1) y2 = dc2(y1) - @test size(y2, outdim(dc2)) == nout(dc2) + @test size(y2)[end-1] == nout(dc2) end @testset "DepthwiseConv groupsize 3 into groupsize 5" begin @@ -307,11 +307,11 @@ end # Test that we actually succeeded in making a valid model y1 = dc1(ones(Float32,5,5, nout(inpt), 2)) - @test size(y1, outdim(dc1)) == nout(dc1) + @test size(y1)[end-1] == nout(dc1) y2 = dc2(y1) - @test size(y2, outdim(dc2)) == nout(dc2) + @test size(y2)[end-1] == nout(dc2) y3 = dc3(y2) - @test size(y3, outdim(dc3)) == nout(dc3) + @test size(y3)[end-1] == nout(dc3) end @testset "Depthwise conv change input size from Conv" begin From 39de482dae2917819fd9eaa62cbb7e0d34160ffd Mon Sep 17 00:00:00 2001 From: DrChainsaw Date: Sun, 10 Apr 2022 15:06:40 +0200 Subject: [PATCH 2/4] Fix doctest errors Rename DepthwiseConv strategies to GroupedConv strategies Flux.Diagonal => Flux.Scale in tests --- src/constraints.jl | 68 ++++++++++++++++++++++------------------------ src/vertex.jl | 6 ++-- test/mutable.jl | 4 +-- test/util.jl | 4 +-- test/vertex.jl | 16 +++++------ 5 files changed, 48 insertions(+), 50 deletions(-) diff --git a/src/constraints.jl b/src/constraints.jl index ab161f3..e3211d8 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -1,74 +1,72 @@ """ - DepthwiseConvAllowNinChangeStrategy(newoutputsmax::Integer, multipliersmax::Integer, base, [fallback]) - DepthwiseConvAllowNinChangeStrategy(allowed_new_outgroups::AbstractVector{<:Integer}, allowed_multipliers::AbstractVector{<:Integer}, base, [fallback]) + GroupedConvAllowNinChangeStrategy(newoutputsmax::Integer, multipliersmax::Integer, base, [fallback]) + GroupedConvAllowNinChangeStrategy(allowed_new_outgroups::AbstractVector{<:Integer}, allowed_multipliers::AbstractVector{<:Integer}, base, [fallback]) -`DecoratingJuMPΔSizeStrategy` which allows both nin and nout of `DepthwiseConv` layers to change independently. +`DecoratingJuMPΔSizeStrategy` which allows both nin and nout of grouped `Conv` layers (i.e `Conv` with `groups` != 1) to change independently. -Might cause optimization to take very long time so use with care! Use [`DepthwiseConvSimpleΔSizeStrategy`](@ref) -if `DepthwiseConvAllowNinChangeStrategy` takes too long. +Might cause optimization to take very long time so use with care! Use [`GroupedConvSimpleΔSizeStrategy`](@ref) +if `GroupedConvAllowNinChangeStrategy` takes too long. The elements of `allowed_new_outgroups` determine how many extra elements in the output dimension of the weight -shall be tried for each existing output element. For example, for a `DepthwiseConv((k1,k2), nin=>nout))` there -are `nout / nin` elements in the output dimension. With `allowed_new_outgroups = 0:3` it is allowed to insert -0, 1, 2 or 3 new elements in the output dimension between each already existing element (so with `nout / nin` -elements the maximum increase is `3 * nout / nin`). +shall be tried for each existing output element. For example, for a `Conv((k1,k2), nin=>nout; groups=nin))` one +must insert integer multiples of `nout / nin` elements at the time. With `nin/nout = k` and `allowed_new_outgroups = 0:3` it is allowed to insert 0, `k`, `2k` or `3k` new elements in the output dimension between each already existing element. The elements of `allowed_multipliers` determine the total number of allowed output elements, i.e the allowed ratios of `nout / nin`. If `fallback` is not provided, it will be derived from `base`. """ -struct DepthwiseConvAllowNinChangeStrategy{S,F} <: DecoratingJuMPΔSizeStrategy +struct GroupedConvAllowNinChangeStrategy{S,F} <: DecoratingJuMPΔSizeStrategy allowed_new_outgroups::Vector{Int} allowed_multipliers::Vector{Int} base::S fallback::F end -DepthwiseConvAllowNinChangeStrategy(newoutputsmax::Integer, multipliersmax::Integer,base,fb...) = DepthwiseConvAllowNinChangeStrategy(0:newoutputsmax, 1:multipliersmax, base, fb...) +GroupedConvAllowNinChangeStrategy(newoutputsmax::Integer, multipliersmax::Integer,base,fb...) = GroupedConvAllowNinChangeStrategy(0:newoutputsmax, 1:multipliersmax, base, fb...) -function DepthwiseConvAllowNinChangeStrategy( +function GroupedConvAllowNinChangeStrategy( allowed_new_outgroups::AbstractVector{<:Integer}, allowed_multipliers::AbstractVector{<:Integer}, - base, fb= recurse_fallback(s -> DepthwiseConvAllowNinChangeStrategy(allowed_new_outgroups, allowed_multipliers, s), base)) - return DepthwiseConvAllowNinChangeStrategy(collect(Int, allowed_new_outgroups), collect(Int, allowed_multipliers), base, fb) + base, fb= recurse_fallback(s -> GroupedConvAllowNinChangeStrategy(allowed_new_outgroups, allowed_multipliers, s), base)) + return GroupedConvAllowNinChangeStrategy(collect(Int, allowed_new_outgroups), collect(Int, allowed_multipliers), base, fb) end -NaiveNASlib.base(s::DepthwiseConvAllowNinChangeStrategy) = s.base -NaiveNASlib.fallback(s::DepthwiseConvAllowNinChangeStrategy) = s.fallback +NaiveNASlib.base(s::GroupedConvAllowNinChangeStrategy) = s.base +NaiveNASlib.fallback(s::GroupedConvAllowNinChangeStrategy) = s.fallback -NaiveNASlib.add_participants!(s::DepthwiseConvAllowNinChangeStrategy, vs=AbstractVertex[]) = NaiveNASlib.add_participants!(base(s), vs) +NaiveNASlib.add_participants!(s::GroupedConvAllowNinChangeStrategy, vs=AbstractVertex[]) = NaiveNASlib.add_participants!(base(s), vs) """ - DepthwiseConvSimpleΔSizeStrategy(base, [fallback]) + GroupedConvSimpleΔSizeStrategy(base, [fallback]) -`DecoratingJuMPΔSizeStrategy` which only allows nout of `DepthwiseConv` layers to change. +`DecoratingJuMPΔSizeStrategy` which only allows nout of grouped `Conv` layers (i.e `Conv` with `groups` != 1) to change. -Use if [`DepthwiseConvAllowNinChangeStrategy`](@ref) takes too long to solve. +Use if [`GroupedConvAllowNinChangeStrategy`](@ref) takes too long to solve. The elements of `allowed_multipliers` determine the total number of allowed output elements, i.e the allowed -ratios of `nout / nin`. +ratios of `nout / nin` (where `nin` is fixed). If `fallback` is not provided, it will be derived from `base`. """ -struct DepthwiseConvSimpleΔSizeStrategy{S, F} <: DecoratingJuMPΔSizeStrategy +struct GroupedConvSimpleΔSizeStrategy{S, F} <: DecoratingJuMPΔSizeStrategy allowed_multipliers::Vector{Int} base::S fallback::F end -DepthwiseConvSimpleΔSizeStrategy(maxms::Integer, base, fb...) = DepthwiseConvSimpleΔSizeStrategy(1:maxms, base, fb...) -function DepthwiseConvSimpleΔSizeStrategy(ms::AbstractVector{<:Integer}, base, fb=recurse_fallback(s -> DepthwiseConvSimpleΔSizeStrategy(ms, s), base)) - return DepthwiseConvSimpleΔSizeStrategy(collect(Int, ms), base, fb) +GroupedConvSimpleΔSizeStrategy(maxms::Integer, base, fb...) = GroupedConvSimpleΔSizeStrategy(1:maxms, base, fb...) +function GroupedConvSimpleΔSizeStrategy(ms::AbstractVector{<:Integer}, base, fb=recurse_fallback(s -> GroupedConvSimpleΔSizeStrategy(ms, s), base)) + return GroupedConvSimpleΔSizeStrategy(collect(Int, ms), base, fb) end -NaiveNASlib.base(s::DepthwiseConvSimpleΔSizeStrategy) = s.base -NaiveNASlib.fallback(s::DepthwiseConvSimpleΔSizeStrategy) = s.fallback +NaiveNASlib.base(s::GroupedConvSimpleΔSizeStrategy) = s.base +NaiveNASlib.fallback(s::GroupedConvSimpleΔSizeStrategy) = s.fallback -NaiveNASlib.add_participants!(s::DepthwiseConvSimpleΔSizeStrategy, vs=AbstractVertex[]) = NaiveNASlib.add_participants!(base(s), vs) +NaiveNASlib.add_participants!(s::GroupedConvSimpleΔSizeStrategy, vs=AbstractVertex[]) = NaiveNASlib.add_participants!(base(s), vs) recurse_fallback(f, s::AbstractJuMPΔSizeStrategy) = wrap_fallback(f, NaiveNASlib.fallback(s)) @@ -115,8 +113,8 @@ function NaiveNASlib.compconstraint!(::NaiveNASlib.ScalarSize, s::AbstractJuMPΔ end end -allowed_multipliers(s::DepthwiseConvAllowNinChangeStrategy) = s.allowed_multipliers -allowed_multipliers(s::DepthwiseConvSimpleΔSizeStrategy) = s.allowed_multipliers +allowed_multipliers(s::GroupedConvAllowNinChangeStrategy) = s.allowed_multipliers +allowed_multipliers(s::GroupedConvSimpleΔSizeStrategy) = s.allowed_multipliers allowed_multipliers(::AbstractJuMPΔSizeStrategy) = 1:10 @@ -131,10 +129,10 @@ function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::Abstrac ngroups(v) == 1 && return 0 return log2(nout(v)) # Very roughly determined... end - return NaiveNASlib.compconstraint!(case, DepthwiseConvSimpleΔSizeStrategy(10, s, NaiveNASlib.DefaultJuMPΔSizeStrategy()), t, data) + return NaiveNASlib.compconstraint!(case, GroupedConvSimpleΔSizeStrategy(10, s, NaiveNASlib.DefaultJuMPΔSizeStrategy()), t, data) end # The number of allowed multipliers can probably be better tuned, perhaps based on current size. - return NaiveNASlib.compconstraint!(case, DepthwiseConvAllowNinChangeStrategy(10, 10, s, NaiveNASlib.DefaultJuMPΔSizeStrategy()), t, data) + return NaiveNASlib.compconstraint!(case, GroupedConvAllowNinChangeStrategy(10, 10, s, NaiveNASlib.DefaultJuMPΔSizeStrategy()), t, data) #= For benchmarking: using NaiveNASflux, Flux, NaiveNASlib.Advanced @@ -156,7 +154,7 @@ function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::Abstrac =# end -function NaiveNASlib.compconstraint!(::NaiveNASlib.NeuronIndices, s::DepthwiseConvSimpleΔSizeStrategy, t::FluxConvolutional, data) +function NaiveNASlib.compconstraint!(::NaiveNASlib.NeuronIndices, s::GroupedConvSimpleΔSizeStrategy, t::FluxConvolutional, data) model = data.model v = data.vertex select = data.outselectvars[v] @@ -186,7 +184,7 @@ function NaiveNASlib.compconstraint!(::NaiveNASlib.NeuronIndices, s::DepthwiseCo NaiveNASlib.compconstraint!(NaiveNASlib.ScalarSize(), s, t, data, allowed_multipliers(s)) end -function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::DepthwiseConvAllowNinChangeStrategy, t::FluxConvolutional, data) +function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::GroupedConvAllowNinChangeStrategy, t::FluxConvolutional, data) model = data.model v = data.vertex select = data.outselectvars[v] @@ -206,7 +204,7 @@ function NaiveNASlib.compconstraint!(case::NaiveNASlib.NeuronIndices, s::Depthwi # ins = filter(vin -> vin in keys(data.noutdict), inputs(v)) # If inputs to v are not part of problem we have to keep nin(v) fixed! - isempty(ins) && return NaiveNASlib.compconstraint!(case, DepthwiseConvSimpleΔSizeStrategy(allowed_multipliers(s), base(s)), t, data) + isempty(ins) && return NaiveNASlib.compconstraint!(case, GroupedConvSimpleΔSizeStrategy(allowed_multipliers(s), base(s)), t, data) # TODO: Check if input is immutable and do simple strat then too? inselect = data.outselectvars[ins[]] ininsert = data.outinsertvars[ins[]] diff --git a/src/vertex.jl b/src/vertex.jl index 380294e..4ba39a5 100644 --- a/src/vertex.jl +++ b/src/vertex.jl @@ -204,7 +204,7 @@ Return the computation wrapped inside `v` and inside any mutable wrappers. julia> using NaiveNASflux, Flux julia> layer(fluxvertex(Dense(2,3), inputvertex("in", 2))) -Dense(2, 3) # 9 parameters +Dense(2 => 3) # 9 parameters ``` """ layer(v::AbstractVertex) = layer(base(v)) @@ -235,12 +235,12 @@ This typically means create a new layer with the given values and set the wrappe julia> v = fluxvertex(Dense(3, 4, relu), inputvertex("in", 3)); julia> layer(v) -Dense(3, 4, relu) # 16 parameters +Dense(3 => 4, relu) # 16 parameters julia> NaiveNASflux.setlayer!(v, (;σ=tanh)); julia> layer(v) -Dense(3, 4, tanh) # 16 parameters +Dense(3 => 4, tanh) # 16 parameters ``` """ function setlayer!(x, propval) end diff --git a/test/mutable.jl b/test/mutable.jl index abb618c..5fd2581 100644 --- a/test/mutable.jl +++ b/test/mutable.jl @@ -198,8 +198,8 @@ end end - @testset "Diagonal MutableLayer" begin - m = MutableLayer(Flux.Diagonal(4)) + @testset "Scale MutableLayer" begin + m = MutableLayer(Flux.Scale(4)) @test nin(m) == [nout(m)] == [4] diff --git a/test/util.jl b/test/util.jl index c8e4a0f..adfdfcb 100644 --- a/test/util.jl +++ b/test/util.jl @@ -35,7 +35,7 @@ @test nin(CrossCor((1,2,3), 4=>5)) == [4] @test nout(CrossCor((1,2,3), 4=>5)) == 5 - @test nin(Flux.Diagonal(3)) == [nout(Flux.Diagonal(3))] == [3] + @test nin(Flux.Scale(3)) == [nout(Flux.Scale(3))] == [3] @test nin(LayerNorm(3)) == [nout(LayerNorm(3))] == [3] @test nin(BatchNorm(3)) == [nout(BatchNorm(3))] == [3] @@ -61,7 +61,7 @@ @test actdim(DepthwiseConv((1,2), 3=>6)) == 3 @test actdim(CrossCor((1,2), 3=>6)) == 3 - @test actdim(Flux.Diagonal(1)) == indim(Flux.Diagonal(2)) == outdim(Flux.Diagonal(3)) == 1 + @test actdim(Flux.Scale(1)) == indim(Flux.Scale(2)) == outdim(Flux.Scale(3)) == 1 @test actdim(GenericFluxRecurrent()) == 1 @test actdim(RNN(3,4)) == 1 diff --git a/test/vertex.jl b/test/vertex.jl index 3e959b5..e66af9a 100644 --- a/test/vertex.jl +++ b/test/vertex.jl @@ -212,35 +212,35 @@ end @test reshape(dc(fill(10f0, (1,1,3,1))), :) == [101,303,404,505,0, 0, 108,310,411,512,0 ,0, 115,317,418,519,0, 0] end - @testset "DepthwiseConvAllowNinChangeStrategy" begin - import NaiveNASflux: DepthwiseConvAllowNinChangeStrategy + @testset "GroupedConvAllowNinChangeStrategy" begin + import NaiveNASflux: GroupedConvAllowNinChangeStrategy import NaiveNASlib: ΔNout inpt = inputvertex("in", 2, FluxConv{2}()) dc = fluxvertex("dc", DepthwiseConv((1,1), nout(inpt) => 3*nout(inpt)), inpt) # Get output multiplier == 4 (nout = 4 * nin) by adding one more outgroup (4 = 3 + 1) - okstrat = DepthwiseConvAllowNinChangeStrategy([1], [4], ΔNout(dc => 2)) + okstrat = GroupedConvAllowNinChangeStrategy([1], [4], ΔNout(dc => 2)) @test Δsize!(okstrat, dc) @test nout(dc) == 8 @test nin(dc) == [2] - failstrat = DepthwiseConvAllowNinChangeStrategy([10], [0], ΔNout(dc => 2)) + failstrat = GroupedConvAllowNinChangeStrategy([10], [0], ΔNout(dc => 2)) @test @test_logs (:warn, r"Could not change nout of dc") match_mode=:any Δsize!(failstrat, dc) == false end - @testset "DepthwiseConvSimpleΔSizeStrategy" begin - using NaiveNASflux: DepthwiseConvSimpleΔSizeStrategy + @testset "GroupedConvSimpleΔSizeStrategy" begin + using NaiveNASflux: GroupedConvSimpleΔSizeStrategy using NaiveNASlib: ΔNout inpt = inputvertex("in", 2, FluxConv{2}()) dc = fluxvertex("dc", DepthwiseConv((1,1), nout(inpt) => 3*nout(inpt)), inpt) - okstrat = DepthwiseConvSimpleΔSizeStrategy(4, ΔNout(dc => 2)) + okstrat = GroupedConvSimpleΔSizeStrategy(4, ΔNout(dc => 2)) @test Δsize!(okstrat, dc) @test nout(dc) == 8 @test nin(dc) == [2] # We tested complete failure above, so lets make the relaxation work here - failstrat = DepthwiseConvSimpleΔSizeStrategy(5, ΔNout(dc => 3)) + failstrat = GroupedConvSimpleΔSizeStrategy(5, ΔNout(dc => 3)) @test_logs (:warn, r"Could not change nout of dc") Δsize!(failstrat, dc) @test nout(dc) == 10 @test nin(dc) == [2] From 0b85a28b46b316b62cd4220d3b7b19afe1600cd4 Mon Sep 17 00:00:00 2001 From: DrChainsaw Date: Sun, 10 Apr 2022 15:49:07 +0200 Subject: [PATCH 3/4] Remove spaces in doctest --- src/vertex.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vertex.jl b/src/vertex.jl index 4ba39a5..2450209 100644 --- a/src/vertex.jl +++ b/src/vertex.jl @@ -204,7 +204,7 @@ Return the computation wrapped inside `v` and inside any mutable wrappers. julia> using NaiveNASflux, Flux julia> layer(fluxvertex(Dense(2,3), inputvertex("in", 2))) -Dense(2 => 3) # 9 parameters +Dense(2 => 3) # 9 parameters ``` """ layer(v::AbstractVertex) = layer(base(v)) @@ -235,12 +235,12 @@ This typically means create a new layer with the given values and set the wrappe julia> v = fluxvertex(Dense(3, 4, relu), inputvertex("in", 3)); julia> layer(v) -Dense(3 => 4, relu) # 16 parameters +Dense(3 => 4, relu) # 16 parameters julia> NaiveNASflux.setlayer!(v, (;σ=tanh)); julia> layer(v) -Dense(3 => 4, tanh) # 16 parameters +Dense(3 => 4, tanh) # 16 parameters ``` """ function setlayer!(x, propval) end From f572a3d8123fcbb53619c43b04beaa843e916ac1 Mon Sep 17 00:00:00 2001 From: DrChainsaw Date: Sun, 10 Apr 2022 16:37:54 +0200 Subject: [PATCH 4/4] Fix doctest --- src/mutable.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mutable.jl b/src/mutable.jl index 4789876..c00dd2a 100644 --- a/src/mutable.jl +++ b/src/mutable.jl @@ -241,7 +241,7 @@ julia> lazy(ones(Float32, 2, 5)) |> size (3, 5) julia> layer(lazy) -Dense(2, 3, relu) # 9 parameters +Dense(2 => 3, relu) # 9 parameters ``` """ mutable struct LazyMutable <: AbstractMutableComp