Skip to content

Commit

Permalink
Add support for UnitNormalization
Browse files Browse the repository at this point in the history
  • Loading branch information
Dobiasd committed Nov 28, 2023
1 parent 3bfb17f commit 715795b
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 68 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Would you like to build/train a model using Keras/Python? And would you like to
* `BatchNormalization`, `Dense`, `Flatten`, `Normalization`
* `Dropout`, `AlphaDropout`, `GaussianDropout`, `GaussianNoise`
* `SpatialDropout1D`, `SpatialDropout2D`, `SpatialDropout3D`
* `ActivityRegularization`, `LayerNormalization`
* `ActivityRegularization`, `LayerNormalization`, `UnitNormalization`
* `RandomContrast`, `RandomFlip`, `RandomHeight`
* `RandomRotation`, `RandomTranslation`, `RandomWidth`, `RandomZoom`
* `MaxPooling1D/2D/3D`, `GlobalMaxPooling1D/2D/3D`
Expand Down Expand Up @@ -81,7 +81,7 @@ Would you like to build/train a model using Keras/Python? And would you like to
`LSTMCell`, `Masking`, `MultiHeadAttention`,
`RepeatVector`, `RNN`, `SimpleRNN`,
`SimpleRNNCell`, `StackedRNNCells`, `StringLookup`, `TextVectorization`,
`ThresholdedReLU`, `UnitNormalization`, `Upsampling3D`, `temporal` models
`ThresholdedReLU`, `Upsampling3D`, `temporal` models

Usage
-----
Expand Down
13 changes: 11 additions & 2 deletions include/fdeep/import_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include "fdeep/layers/input_layer.hpp"
#include "fdeep/layers/layer.hpp"
#include "fdeep/layers/layer_normalization_layer.hpp"
#include "fdeep/layers/unit_normalization_layer.hpp"
#include "fdeep/layers/leaky_relu_layer.hpp"
#include "fdeep/layers/embedding_layer.hpp"
#include "fdeep/layers/lstm_layer.hpp"
Expand Down Expand Up @@ -553,6 +554,13 @@ inline layer_ptr create_layer_normalization_layer(const get_param_f& get_param,
name, axes, beta, gamma, epsilon);
}

inline layer_ptr create_unit_normalization_layer(const get_param_f&,
const nlohmann::json& data, const std::string& name)
{
const auto axes = create_vector<int>(create_int, data["config"]["axis"]);
return std::make_shared<unit_normalization_layer>(name, axes);
}

inline layer_ptr create_identity_layer(
const get_param_f&, const nlohmann::json&, const std::string& name)
{
Expand Down Expand Up @@ -639,7 +647,7 @@ inline layer_ptr create_concatenate_layer(
const get_param_f&, const nlohmann::json& data,
const std::string& name)
{
const std::int32_t keras_axis = data["config"]["axis"];
const int keras_axis = data["config"]["axis"];
return std::make_shared<concatenate_layer>(name, keras_axis);
}

Expand Down Expand Up @@ -668,7 +676,7 @@ inline layer_ptr create_dot_layer(
const get_param_f&, const nlohmann::json& data,
const std::string& name)
{
const auto axes = create_vector<std::size_t>(create_size_t, data["config"]["axes"]);
const auto axes = create_vector<int>(create_int, data["config"]["axes"]);
const bool normalize = data["config"]["normalize"];
return std::make_shared<dot_layer>(name, axes, normalize);
}
Expand Down Expand Up @@ -1301,6 +1309,7 @@ inline layer_ptr create_layer(const get_param_f& get_param,
{"InputLayer", create_input_layer},
{"BatchNormalization", create_batch_normalization_layer},
{"LayerNormalization", create_layer_normalization_layer},
{"UnitNormalization", create_unit_normalization_layer},
{"Dropout", create_identity_layer},
{"ActivityRegularization", create_identity_layer},
{"AlphaDropout", create_identity_layer},
Expand Down
2 changes: 1 addition & 1 deletion include/fdeep/layers/additive_attention_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class additive_attention_layer : public layer
reshape(key, tensor_shape(1, key.shape().width_, key.shape().depth_)))))),
tensor_shape(query.shape().width_, key.shape().width_));
const tensor distribution = softmax(scores);
return {dot_product_tensors(distribution, value, std::vector<std::size_t>({2, 1}), false)};
return {dot_product_tensors(distribution, value, std::vector<int>({2, 1}), false)};
}
float_vec scale_;
};
Expand Down
4 changes: 2 additions & 2 deletions include/fdeep/layers/attention_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class attention_layer : public layer
const tensor& key = input.size() > 2 ? input[2] : value;
const tensor scores = score_mode_ == "dot" ?
transform_tensor(fplus::multiply_with(scale_),
dot_product_tensors(query, transpose(key), std::vector<std::size_t>({2, 1}), false))
dot_product_tensors(query, transpose(key), std::vector<int>({2, 1}), false))
:
// https://github.com/keras-team/keras/blob/v2.13.1/keras/layers/attention/attention.py
transform_tensor(fplus::multiply_with(concat_score_weight_),
Expand All @@ -46,7 +46,7 @@ class attention_layer : public layer
reshape(key, tensor_shape(1, key.shape().width_, key.shape().depth_)))))),
tensor_shape(query.shape().width_, key.shape().width_)));
const tensor distribution = softmax(scores);
return {dot_product_tensors(distribution, value, std::vector<std::size_t>({2, 1}), false)};
return {dot_product_tensors(distribution, value, std::vector<int>({2, 1}), false)};
}
std::string score_mode_;
float_type scale_;
Expand Down
18 changes: 2 additions & 16 deletions include/fdeep/layers/concatenate_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,16 @@ namespace fdeep { namespace internal
class concatenate_layer : public layer
{
public:
explicit concatenate_layer(const std::string& name, std::int32_t axis)
explicit concatenate_layer(const std::string& name, int axis)
: layer(name), axis_(axis)
{
}
protected:
std::int32_t keras_axis_to_fdeep_axis(std::int32_t keras_axis)
{
if (keras_axis == 1)
{
return 1;
}
else if (keras_axis == 2)
{
return 2;
}
assertion(keras_axis == -1 || keras_axis == 3, "Invalid Keras axis (" + std::to_string(keras_axis) +
") for concatenate layer.");
return 0;
}
tensors apply_impl(const tensors& input) const override
{
return {concatenate_tensors(input, axis_)};
}
std::int32_t axis_;
int axis_;
};

} } // namespace fdeep, namespace internal
4 changes: 2 additions & 2 deletions include/fdeep/layers/dot_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ namespace fdeep { namespace internal
class dot_layer : public layer
{
public:
explicit dot_layer(const std::string& name, const std::vector<std::size_t>& axes, bool normalize)
explicit dot_layer(const std::string& name, const std::vector<int>& axes, bool normalize)
: layer(name), axes_(axes), normalize_(normalize)
{
}
std::vector<std::size_t> axes_;
std::vector<int> axes_;
bool normalize_;
protected:
tensors apply_impl(const tensors& input) const override
Expand Down
1 change: 0 additions & 1 deletion include/fdeep/layers/layer_normalization_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#pragma once

#include "fdeep/layers/layer.hpp"
#include "fdeep/layers/batch_normalization_layer.hpp"

#include <string>

Expand Down
35 changes: 35 additions & 0 deletions include/fdeep/layers/unit_normalization_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2016, Tobias Hermann.
// https://github.com/Dobiasd/frugally-deep
// Distributed under the MIT License.
// (See accompanying LICENSE file or at
// https://opensource.org/licenses/MIT)

#pragma once

#include "fdeep/layers/layer.hpp"

#include <string>

namespace fdeep { namespace internal
{

class unit_normalization_layer : public layer
{
public:
explicit unit_normalization_layer(const std::string& name,
std::vector<int> axes)
: layer(name),
axes_(axes)
{
}
protected:
std::vector<int> axes_;

tensors apply_impl(const tensors& inputs) const override
{
const auto& input = single_tensor_from_tensors(inputs);
return {l2_normalize(input, axes_)};
}
};

} } // namespace fdeep, namespace internal
58 changes: 17 additions & 41 deletions include/fdeep/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -922,46 +922,22 @@ inline tensor reshape(const tensor& t, const tensor_shape& target_shape)
return tensor(target_shape, t.as_vector());
}

inline tensor l2_normalize(const tensor& t, std::size_t axis)
{
axis = axis + 5 - t.rank();
const auto reduced_dims = fplus::replace_elem_at_idx(axis - 1, 1,
tensor_shape_with_changed_rank(t.shape(), 5).dimensions());
tensor sum = tensor(create_tensor_shape_from_dims(reduced_dims), float_type(0));
const auto get_sum_ref = [&sum, axis](
std::size_t dim5, std::size_t dim4, std::size_t y, std::size_t x, std::size_t z) -> float_type&
{
assertion(axis >= 1 && axis <= 5, "invalid axis");
if (axis == 1)
return sum.get_ref_ignore_rank(tensor_pos(0, dim4, y, x, z));
else if (axis == 2)
return sum.get_ref_ignore_rank(tensor_pos(dim5, 0, y, x, z));
else if (axis == 3)
return sum.get_ref_ignore_rank(tensor_pos(dim5, dim4, 0, x, z));
else if (axis == 4)
return sum.get_ref_ignore_rank(tensor_pos(dim5, dim4, y, 0, z));
return sum.get_ref_ignore_rank(tensor_pos(dim5, dim4, y, x, 0));
};
loop_over_all_dims(t.shape(), [&](
std::size_t dim5, std::size_t dim4, std::size_t y, std::size_t x, std::size_t z)
{
get_sum_ref(dim5, dim4, y, x, z) +=
fplus::square(t.get_ignore_rank(tensor_pos(dim5, dim4, y, x, z)));
});
auto out = tensor(t.shape(), float_type(0));
loop_over_all_dims(t.shape(), [&](
std::size_t dim5, std::size_t dim4, std::size_t y, std::size_t x, std::size_t z)
{
out.get_ref_ignore_rank(tensor_pos(dim5, dim4, y, x, z)) =
t.get_ignore_rank(tensor_pos(dim5, dim4, y, x, z)) /
std::sqrt(get_sum_ref(dim5, dim4, y, x, z));
});
return out;
inline tensor l2_normalize(const tensor& t, const std::vector<int>& axes)
{
const float_type epsilon = std::numeric_limits<float_type>::epsilon();
// https://github.com/tensorflow/tensorflow/blob/v2.14.0/tensorflow/python/ops/nn_impl.py#L705-L707
const auto square_sum = reduce(add_tensors, transform_tensor(fplus::square<float_type>, t), axes);
const auto x_inv_norm = transform_tensor(
[](float_type v) {return static_cast<float_type>(1) / std::sqrt(v);},
transform_tensor(
[epsilon](float_type x) {return std::max(x, epsilon);},
square_sum));
return mult_tensors(t, x_inv_norm);
}

inline tensor dot_product_tensors(
const tensor& a, const tensor& b,
const std::vector<std::size_t>& axes_raw,
const std::vector<int>& axes_raw,
bool normalize)
{
/*
Expand All @@ -981,7 +957,7 @@ inline tensor dot_product_tensors(
*/

assertion(axes_raw.size() == 1 || axes_raw.size() == 2, "axes must have size 1 or 2");
const auto axes = axes_raw.size() == 2 ? axes_raw : std::vector<std::size_t>({axes_raw.front(), axes_raw.front()});
const auto axes = axes_raw.size() == 2 ? axes_raw : std::vector<int>({axes_raw.front(), axes_raw.front()});

const auto axis_a = axes[0];
const auto axis_b = axes[1];
Expand All @@ -994,11 +970,11 @@ inline tensor dot_product_tensors(
const auto permute_target_a = fplus::prepend_elem(axis_a, permute_target_a_suffix);
const auto permute_target_b = fplus::append_elem(axis_b, permute_target_b_prefix);

const auto a_permuted = permute_tensor(normalize ? l2_normalize(a, axis_a) : a, permute_target_a);
const auto b_permuted = permute_tensor(normalize ? l2_normalize(b, axis_b) : b, permute_target_b);
const auto a_permuted = permute_tensor(normalize ? l2_normalize(a, {axis_a}) : a, permute_target_a);
const auto b_permuted = permute_tensor(normalize ? l2_normalize(b, {axis_b}) : b, permute_target_b);

const auto a_axis_dim_size = a.shape().dimensions()[axis_a - 1];
const auto b_axis_dim_size = b.shape().dimensions()[axis_b - 1];
const auto a_axis_dim_size = a.shape().dimensions()[static_cast<std::size_t>(axis_a - 1)];
const auto b_axis_dim_size = b.shape().dimensions()[static_cast<std::size_t>(axis_b - 1)];

const auto a_remaining_dim_sizes = fplus::elems_at_idxs(
fplus::numbers(std::size_t(1), a.rank()), a_permuted.shape().dimensions());
Expand Down
15 changes: 14 additions & 1 deletion keras_export/generate_test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from tensorflow.keras.layers import ActivityRegularization
from tensorflow.keras.layers import AdditiveAttention
from tensorflow.keras.layers import Attention
from tensorflow.keras.layers import BatchNormalization, Concatenate, LayerNormalization
from tensorflow.keras.layers import BatchNormalization, Concatenate, LayerNormalization, UnitNormalization
from tensorflow.keras.layers import Bidirectional, TimeDistributed
from tensorflow.keras.layers import CategoryEncoding
from tensorflow.keras.layers import Conv1D, ZeroPadding1D, Cropping1D
Expand Down Expand Up @@ -303,6 +303,19 @@ def get_test_model_exhaustive():
outputs.append(LayerNormalization(axis=[1, 2])(inputs[0]))
outputs.append(LayerNormalization(axis=[2, 3, 5])(inputs[0]))

outputs.append(UnitNormalization()(inputs[11]))
outputs.append(UnitNormalization()(inputs[10]))
outputs.append(UnitNormalization()(inputs[26]))
outputs.append(UnitNormalization()(inputs[24]))
outputs.append(UnitNormalization()(inputs[0]))
outputs.append(UnitNormalization(axis=1)(inputs[0]))
outputs.append(UnitNormalization(axis=2)(inputs[0]))
outputs.append(UnitNormalization(axis=3)(inputs[0]))
outputs.append(UnitNormalization(axis=4)(inputs[0]))
outputs.append(UnitNormalization(axis=5)(inputs[0]))
outputs.append(UnitNormalization(axis=[1, 2])(inputs[0]))
outputs.append(UnitNormalization(axis=[2, 3, 5])(inputs[0]))

outputs.append(Dropout(0.5)(inputs[4]))
outputs.append(ActivityRegularization(0.3, 0.4)(inputs[4]))

Expand Down

0 comments on commit 715795b

Please sign in to comment.