Added mode(vector of categorical values), generalized other mode and …

…removed @turbo in DenseLayer - Generalized mode and allowed to work on stuff like mode (["a","b","a"]) - Removed @turbo that was creating some strange errors in using NeuralNeworkEstimator as inputer - Added test for NeuralNetworkEstimator as inputer
sylvaticus · Jul 17, 2024 · 55c1958 · 55c1958
1 parent 68e29bf
commit 55c1958
Show file tree

Hide file tree

Showing 6 changed files with 36 additions and 10 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "BetaML"
 uuid = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 authors = ["Antonello Lobianco <[email protected]>"]
-version = "0.12.0"
+version = "0.12.1"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

diff --git a/src/Nn/default_layers/DenseLayer.jl b/src/Nn/default_layers/DenseLayer.jl
@@ -58,7 +58,7 @@ function _zComp(layer::DenseLayer{TF,DTF,WET},x) where {TF, DTF, WET}
     z  = zeros(WET,size(w,1))
     @inbounds for n in axes(w,1)
         zn = zero(eltype(x))
-        @turbo for nl in axes(x,1)
+        for nl in axes(x,1) # @turbo
             zn += w[n,nl] * x[nl]
         end
         zn   += wb[n]
@@ -70,7 +70,7 @@ end
 function _zComp!(z,layer::DenseLayer{TF,DTF,WET},x) where {TF, DTF, WET}
    @inbounds for n in axes(layer.w,1)
       zn = zero(WET)
-      @turbo for nl in axes(x,1)
+      for nl in axes(x,1) # @turbo
          zn += layer.w[n,nl] * x[nl]
       end
       z[n] += zn
@@ -95,8 +95,8 @@ function backward(layer::DenseLayer{TF,DTF,WET},x,next_gradient) where {TF, DTF,
     else
       dfz = layer.f'.(z) # using AD
     end
-   dϵ_dz = @turbo dfz .* next_gradient
-   dϵ_dI = @turbo layer.w' * dϵ_dz # @avx
+   dϵ_dz = dfz .* next_gradient # @turbo
+   dϵ_dI = layer.w' * dϵ_dz  # @turbo # @avx
    return dϵ_dI
 end
 
@@ -113,8 +113,8 @@ function get_gradient(layer::DenseLayer{TF,DTF,WET},x,next_gradient) where {TF,
    else
       dfz =  layer.f'.(z) # using AD
    end
-   dϵ_dz  = @turbo  dfz .* next_gradient
-   dϵ_dw  = @turbo dϵ_dz * x' # @avx
+   dϵ_dz  =  dfz .* next_gradient # @turbo 
+   dϵ_dw  =  dϵ_dz * x' # @turbo # @avx
    dϵ_dwb = dϵ_dz
    return Learnable((dϵ_dw,dϵ_dwb))
 end

diff --git a/src/Utils/Processing.jl b/src/Utils/Processing.jl
@@ -1392,7 +1392,7 @@ end
 Return the key with highest mode (using rand in case of multimodal values)
 
 """
-function mode(dict::Dict{T,Float64};rng = Random.GLOBAL_RNG) where {T}
+function mode(dict::Dict{T,T2};rng = Random.GLOBAL_RNG) where {T, T2 <: Number}
     mks = [k for (k,v) in dict if v==maximum(values(dict))]
     if length(mks) == 1
         return mks[1]
@@ -1428,16 +1428,23 @@ Use it to return a unique value from a multiclass classifier returning probabili
 - If multiple classes have the highest mode, one is returned at random (use the parameter `rng` to fix the stochasticity)
 
 """
-function mode(dicts::AbstractArray{Dict{T,Float64}};rng = Random.GLOBAL_RNG) where {T}
+function mode(dicts::AbstractArray{AbstractDict{T,<: Number}};rng = Random.GLOBAL_RNG) where {T}
     return mode.(dicts;rng=rng)
 end
 
 function mode(vals::AbstractArray{T,1};rng = Random.GLOBAL_RNG) where {T <: AbstractArray{T2,1} where T2 <: Number}
     return mode.(vals;rng=rng)
 end
+function mode(vals::AbstractArray{T,1};rng = Random.GLOBAL_RNG) where {T <: AbstractDict{T2,<: Number} where T2 }
+    return mode.(vals;rng=rng)
+end
+
 function mode(vals::AbstractArray{T,2};rng = Random.GLOBAL_RNG) where {T <: Number}
     return [mode(r;rng=rng) for r in eachrow(vals)]
 end
+function mode(vals::AbstractArray{T,1};rng = Random.GLOBAL_RNG) where {T}
+    return mode(class_counts_with_labels(vals);rng=rng)
+end
 
 
 """

diff --git a/test/Imputation_tests.jl b/test/Imputation_tests.jl
@@ -225,6 +225,21 @@ Xfull2 = BetaML.fit!(mod2,X)
 # predict(mod2,X)
 
 
+rng2 = deepcopy(TESTRNG)
+X = (vcat([[s*2-rand(rng2)/10 s*0.5*(1+rand(rng2)/10) exp(s)] for s in rand(rng2,600)]...))
+X = convert(Matrix{Union{Float64,Missing}},X)
+orig = [X[1,1], X[2,3], X[3,2]]
+X[1,1] = missing # 
+X[2,3] = missing # 
+X[3,2] = missing #
+rng2 = deepcopy(TESTRNG)
+m = GeneralImputer(estimator=NeuralNetworkEstimator(rng=rng2, batch_size=256, epochs=300, verbosity=NONE), rng=rng2)
+x_full = fit!(m,X)
+imputed = [x_full[1,1], x_full[2,3], x_full[3,2]]
+rme = relative_mean_error(orig,imputed)
+@test rme < 0.5
+
+
 
 # ------------------------------------------------------------------------------
 

diff --git a/test/Utils_tests.jl b/test/Utils_tests.jl
@@ -3,7 +3,7 @@ import Distributions: Normal
 
 #using StableRNGs
 #rng = StableRNG(123)
-using BetaML
+#using BetaML
 #import BetaML.Utils
 
 TESTRNG = FIXEDRNG # This could change...
@@ -406,6 +406,9 @@ y = [y1,y2]
 y = vcat(y1',y2')
 mode(y,rng=copy(TESTRNG)) == [4,3]
 
+x = ["a","b","b","a","d","b"]
+@test mode(x,rng=copy(TESTRNG)) == "b"
+
 # ==================================
 # New test
 

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,4 +1,5 @@
 using Test
+using BetaML
 
 #using Pkg # seems Julia bug: can't find pkg `Pkg` !!
 #Pkg.activate(@__DIR__)