From 70323c9d36f6ffaeabb17408c8d3b5e2bf15e19f Mon Sep 17 00:00:00 2001 From: "Wang, Harry" Date: Thu, 20 Feb 2025 14:17:01 -0800 Subject: [PATCH 01/18] Init: add examples --- experiments/.gitignore | 1 + experiments/conv1d_gru_example.ipynb | 394 ++++++++++ experiments/dense_streaming/CMakeLists.txt | 338 +++++++++ experiments/dense_streaming/hls4ml_config.yml | 46 ++ experiments/dense_streaming/keras_model.keras | Bin 0 -> 16672 bytes .../dense_streaming/src/exception_handler.hpp | 21 + .../dense_streaming/src/firmware/defines.h | 33 + .../src/firmware/myproject.cpp | 35 + .../dense_streaming/src/firmware/myproject.h | 33 + .../nnet_utils/activation_tables/elu_table.tb | 1 + .../nnet_utils/activation_tables/exp_table.tb | 1 + .../activation_tables/exp_table_latency.tb | 1 + .../activation_tables/exp_table_legacy.tb | 1 + .../activation_tables/invert_table.tb | 1 + .../activation_tables/invert_table_latency.tb | 1 + .../activation_tables/invert_table_legacy.tb | 1 + .../activation_tables/selu_table.tb | 1 + .../activation_tables/sigmoid_table.tb | 1 + .../activation_tables/softplus_table.tb | 1 + .../activation_tables/softsign_table.tb | 1 + .../activation_tables/tanh_table.tb | 1 + .../src/firmware/nnet_utils/nnet_activation.h | 499 ++++++++++++ .../nnet_utils/nnet_activation_stream.h | 712 ++++++++++++++++++ .../src/firmware/nnet_utils/nnet_batchnorm.h | 104 +++ .../nnet_utils/nnet_batchnorm_stream.h | 107 +++ .../src/firmware/nnet_utils/nnet_common.h | 76 ++ .../src/firmware/nnet_utils/nnet_conv1d.h | 61 ++ .../nnet_utils/nnet_conv1d_resource.h | 237 ++++++ .../firmware/nnet_utils/nnet_conv1d_stream.h | 177 +++++ .../src/firmware/nnet_utils/nnet_conv2d.h | 67 ++ .../nnet_utils/nnet_conv2d_resource.h | 297 ++++++++ .../firmware/nnet_utils/nnet_conv2d_stream.h | 241 ++++++ .../src/firmware/nnet_utils/nnet_dense.h | 164 ++++ .../firmware/nnet_utils/nnet_dense_stream.h | 23 + .../firmware/nnet_utils/nnet_depthconv1d.h | 19 + .../nnet_utils/nnet_depthconv1d_resource.h | 60 ++ .../firmware/nnet_utils/nnet_depthconv2d.h | 19 + .../nnet_utils/nnet_depthconv2d_resource.h | 76 ++ .../src/firmware/nnet_utils/nnet_embed.h | 43 ++ .../firmware/nnet_utils/nnet_embed_stream.h | 31 + .../src/firmware/nnet_utils/nnet_helpers.h | 118 +++ .../src/firmware/nnet_utils/nnet_merge.h | 233 ++++++ .../firmware/nnet_utils/nnet_merge_stream.h | 359 +++++++++ .../src/firmware/nnet_utils/nnet_mult.h | 113 +++ .../src/firmware/nnet_utils/nnet_padding.h | 104 +++ .../firmware/nnet_utils/nnet_padding_stream.h | 81 ++ .../src/firmware/nnet_utils/nnet_pooling.h | 257 +++++++ .../firmware/nnet_utils/nnet_pooling_stream.h | 322 ++++++++ .../src/firmware/nnet_utils/nnet_printf.h | 18 + .../src/firmware/nnet_utils/nnet_recurrent.h | 566 ++++++++++++++ .../nnet_utils/nnet_recurrent_activation.h | 47 ++ .../nnet_utils/nnet_recurrent_stream.h | 68 ++ .../src/firmware/nnet_utils/nnet_resize.h | 36 + .../firmware/nnet_utils/nnet_resize_stream.h | 58 ++ .../src/firmware/nnet_utils/nnet_stream.h | 126 ++++ .../src/firmware/nnet_utils/nnet_transpose.h | 37 + .../nnet_utils/nnet_transpose_stream.h | 39 + .../src/firmware/nnet_utils/nnet_types.h | 71 ++ .../dense_streaming/src/firmware/parameters.h | 71 ++ .../dense_streaming/src/firmware/weights/b2.h | 11 + .../dense_streaming/src/firmware/weights/b4.h | 11 + .../dense_streaming/src/firmware/weights/w2.h | 11 + .../dense_streaming/src/firmware/weights/w4.h | 11 + .../dense_streaming/src/myproject_bridge.cpp | 86 +++ .../dense_streaming/src/myproject_test.cpp | 150 ++++ experiments/hostpipe_parallel/CMakeLists.txt | 338 +++++++++ .../hostpipe_parallel/hls4ml_config.yml | 17 + .../hostpipe_parallel/keras_model.keras | Bin 0 -> 24984 bytes .../src/exception_handler.hpp | 21 + .../hostpipe_parallel/src/firmware/defines.h | 40 + .../src/firmware/myproject.cpp | 39 + .../src/firmware/myproject.h | 33 + .../nnet_utils/activation_tables/elu_table.tb | 1 + .../nnet_utils/activation_tables/exp_table.tb | 1 + .../activation_tables/exp_table_latency.tb | 1 + .../activation_tables/exp_table_legacy.tb | 1 + .../activation_tables/invert_table.tb | 1 + .../activation_tables/invert_table_latency.tb | 1 + .../activation_tables/invert_table_legacy.tb | 1 + .../activation_tables/selu_table.tb | 1 + .../activation_tables/sigmoid_table.tb | 1 + .../activation_tables/softplus_table.tb | 1 + .../activation_tables/softsign_table.tb | 1 + .../activation_tables/tanh_table.tb | 1 + .../src/firmware/nnet_utils/nnet_activation.h | 499 ++++++++++++ .../nnet_utils/nnet_activation_stream.h | 712 ++++++++++++++++++ .../src/firmware/nnet_utils/nnet_batchnorm.h | 104 +++ .../nnet_utils/nnet_batchnorm_stream.h | 107 +++ .../src/firmware/nnet_utils/nnet_common.h | 76 ++ .../src/firmware/nnet_utils/nnet_conv1d.h | 61 ++ .../nnet_utils/nnet_conv1d_resource.h | 237 ++++++ .../firmware/nnet_utils/nnet_conv1d_stream.h | 177 +++++ .../src/firmware/nnet_utils/nnet_conv2d.h | 67 ++ .../nnet_utils/nnet_conv2d_resource.h | 297 ++++++++ .../firmware/nnet_utils/nnet_conv2d_stream.h | 241 ++++++ .../src/firmware/nnet_utils/nnet_dense.h | 164 ++++ .../firmware/nnet_utils/nnet_dense_stream.h | 23 + .../firmware/nnet_utils/nnet_depthconv1d.h | 19 + .../nnet_utils/nnet_depthconv1d_resource.h | 60 ++ .../firmware/nnet_utils/nnet_depthconv2d.h | 19 + .../nnet_utils/nnet_depthconv2d_resource.h | 76 ++ .../src/firmware/nnet_utils/nnet_embed.h | 43 ++ .../firmware/nnet_utils/nnet_embed_stream.h | 31 + .../src/firmware/nnet_utils/nnet_helpers.h | 118 +++ .../src/firmware/nnet_utils/nnet_merge.h | 233 ++++++ .../firmware/nnet_utils/nnet_merge_stream.h | 359 +++++++++ .../src/firmware/nnet_utils/nnet_mult.h | 113 +++ .../src/firmware/nnet_utils/nnet_padding.h | 104 +++ .../firmware/nnet_utils/nnet_padding_stream.h | 81 ++ .../src/firmware/nnet_utils/nnet_pooling.h | 257 +++++++ .../firmware/nnet_utils/nnet_pooling_stream.h | 322 ++++++++ .../src/firmware/nnet_utils/nnet_printf.h | 18 + .../src/firmware/nnet_utils/nnet_recurrent.h | 566 ++++++++++++++ .../nnet_utils/nnet_recurrent_activation.h | 47 ++ .../nnet_utils/nnet_recurrent_stream.h | 68 ++ .../src/firmware/nnet_utils/nnet_resize.h | 36 + .../firmware/nnet_utils/nnet_resize_stream.h | 58 ++ .../src/firmware/nnet_utils/nnet_stream.h | 126 ++++ .../src/firmware/nnet_utils/nnet_transpose.h | 37 + .../nnet_utils/nnet_transpose_stream.h | 39 + .../src/firmware/nnet_utils/nnet_types.h | 71 ++ .../src/firmware/parameters.h | 163 ++++ .../src/firmware/weights/b2.h | 11 + .../src/firmware/weights/b5.h | 11 + .../src/firmware/weights/br5.h | 11 + .../src/firmware/weights/w2.h | 11 + .../src/firmware/weights/w5.h | 11 + .../src/firmware/weights/wr5.h | 11 + .../src/myproject_bridge.cpp | 86 +++ .../hostpipe_parallel/src/myproject_test.cpp | 150 ++++ experiments/mlp_streaming/CMakeLists.txt | 338 +++++++++ experiments/mlp_streaming/hls4ml_config.yml | 54 ++ experiments/mlp_streaming/keras_model.keras | Bin 0 -> 18320 bytes .../mlp_streaming/src/exception_handler.hpp | 21 + .../mlp_streaming/src/firmware/defines.h | 39 + .../mlp_streaming/src/firmware/myproject.cpp | 43 ++ .../mlp_streaming/src/firmware/myproject.h | 33 + .../nnet_utils/activation_tables/elu_table.tb | 1 + .../nnet_utils/activation_tables/exp_table.tb | 1 + .../activation_tables/exp_table_latency.tb | 1 + .../activation_tables/exp_table_legacy.tb | 1 + .../activation_tables/invert_table.tb | 1 + .../activation_tables/invert_table_latency.tb | 1 + .../activation_tables/invert_table_legacy.tb | 1 + .../activation_tables/selu_table.tb | 1 + .../activation_tables/sigmoid_table.tb | 1 + .../activation_tables/softplus_table.tb | 1 + .../activation_tables/softsign_table.tb | 1 + .../activation_tables/tanh_table.tb | 1 + .../src/firmware/nnet_utils/nnet_activation.h | 499 ++++++++++++ .../nnet_utils/nnet_activation_stream.h | 712 ++++++++++++++++++ .../src/firmware/nnet_utils/nnet_batchnorm.h | 104 +++ .../nnet_utils/nnet_batchnorm_stream.h | 107 +++ .../src/firmware/nnet_utils/nnet_common.h | 76 ++ .../src/firmware/nnet_utils/nnet_conv1d.h | 61 ++ .../nnet_utils/nnet_conv1d_resource.h | 237 ++++++ .../firmware/nnet_utils/nnet_conv1d_stream.h | 177 +++++ .../src/firmware/nnet_utils/nnet_conv2d.h | 67 ++ .../nnet_utils/nnet_conv2d_resource.h | 297 ++++++++ .../firmware/nnet_utils/nnet_conv2d_stream.h | 241 ++++++ .../src/firmware/nnet_utils/nnet_dense.h | 164 ++++ .../firmware/nnet_utils/nnet_dense_stream.h | 23 + .../firmware/nnet_utils/nnet_depthconv1d.h | 19 + .../nnet_utils/nnet_depthconv1d_resource.h | 60 ++ .../firmware/nnet_utils/nnet_depthconv2d.h | 19 + .../nnet_utils/nnet_depthconv2d_resource.h | 76 ++ .../src/firmware/nnet_utils/nnet_embed.h | 43 ++ .../firmware/nnet_utils/nnet_embed_stream.h | 31 + .../src/firmware/nnet_utils/nnet_helpers.h | 118 +++ .../src/firmware/nnet_utils/nnet_merge.h | 233 ++++++ .../firmware/nnet_utils/nnet_merge_stream.h | 359 +++++++++ .../src/firmware/nnet_utils/nnet_mult.h | 113 +++ .../src/firmware/nnet_utils/nnet_padding.h | 104 +++ .../firmware/nnet_utils/nnet_padding_stream.h | 81 ++ .../src/firmware/nnet_utils/nnet_pooling.h | 257 +++++++ .../firmware/nnet_utils/nnet_pooling_stream.h | 322 ++++++++ .../src/firmware/nnet_utils/nnet_printf.h | 18 + .../src/firmware/nnet_utils/nnet_recurrent.h | 566 ++++++++++++++ .../nnet_utils/nnet_recurrent_activation.h | 47 ++ .../nnet_utils/nnet_recurrent_stream.h | 68 ++ .../src/firmware/nnet_utils/nnet_resize.h | 36 + .../firmware/nnet_utils/nnet_resize_stream.h | 58 ++ .../src/firmware/nnet_utils/nnet_stream.h | 126 ++++ .../src/firmware/nnet_utils/nnet_transpose.h | 37 + .../nnet_utils/nnet_transpose_stream.h | 39 + .../src/firmware/nnet_utils/nnet_types.h | 71 ++ .../mlp_streaming/src/firmware/parameters.h | 89 +++ .../mlp_streaming/src/firmware/weights/b2.h | 11 + .../mlp_streaming/src/firmware/weights/b5.h | 11 + .../mlp_streaming/src/firmware/weights/w2.h | 11 + .../mlp_streaming/src/firmware/weights/w5.h | 11 + .../mlp_streaming/src/myproject_bridge.cpp | 86 +++ .../mlp_streaming/src/myproject_test.cpp | 150 ++++ experiments/requirements.txt | 14 + 194 files changed, 20150 insertions(+) create mode 100644 experiments/.gitignore create mode 100644 experiments/conv1d_gru_example.ipynb create mode 100644 experiments/dense_streaming/CMakeLists.txt create mode 100644 experiments/dense_streaming/hls4ml_config.yml create mode 100644 experiments/dense_streaming/keras_model.keras create mode 100644 experiments/dense_streaming/src/exception_handler.hpp create mode 100644 experiments/dense_streaming/src/firmware/defines.h create mode 100644 experiments/dense_streaming/src/firmware/myproject.cpp create mode 100644 experiments/dense_streaming/src/firmware/myproject.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/elu_table.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table_latency.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table_legacy.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table_latency.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table_legacy.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/selu_table.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/sigmoid_table.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/softplus_table.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/softsign_table.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/tanh_table.tb create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_activation.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_activation_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_batchnorm.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_batchnorm_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_common.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d_resource.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d_resource.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_dense.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_dense_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv1d.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv1d_resource.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv2d.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv2d_resource.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_embed.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_embed_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_helpers.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_merge.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_merge_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_mult.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_padding.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_padding_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_pooling.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_pooling_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_printf.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent_activation.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_resize.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_resize_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_transpose.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_transpose_stream.h create mode 100644 experiments/dense_streaming/src/firmware/nnet_utils/nnet_types.h create mode 100644 experiments/dense_streaming/src/firmware/parameters.h create mode 100644 experiments/dense_streaming/src/firmware/weights/b2.h create mode 100644 experiments/dense_streaming/src/firmware/weights/b4.h create mode 100644 experiments/dense_streaming/src/firmware/weights/w2.h create mode 100644 experiments/dense_streaming/src/firmware/weights/w4.h create mode 100644 experiments/dense_streaming/src/myproject_bridge.cpp create mode 100644 experiments/dense_streaming/src/myproject_test.cpp create mode 100644 experiments/hostpipe_parallel/CMakeLists.txt create mode 100644 experiments/hostpipe_parallel/hls4ml_config.yml create mode 100644 experiments/hostpipe_parallel/keras_model.keras create mode 100644 experiments/hostpipe_parallel/src/exception_handler.hpp create mode 100644 experiments/hostpipe_parallel/src/firmware/defines.h create mode 100644 experiments/hostpipe_parallel/src/firmware/myproject.cpp create mode 100644 experiments/hostpipe_parallel/src/firmware/myproject.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/elu_table.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/exp_table.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/exp_table_latency.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/exp_table_legacy.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/invert_table.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/invert_table_latency.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/invert_table_legacy.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/selu_table.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/sigmoid_table.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/softplus_table.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/softsign_table.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/activation_tables/tanh_table.tb create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_activation.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_activation_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_batchnorm.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_batchnorm_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_common.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_conv1d.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_conv1d_resource.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_conv1d_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_conv2d.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_conv2d_resource.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_conv2d_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_dense.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_dense_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_depthconv1d.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_depthconv1d_resource.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_depthconv2d.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_depthconv2d_resource.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_embed.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_embed_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_helpers.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_merge.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_merge_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_mult.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_padding.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_padding_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_pooling.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_pooling_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_printf.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_recurrent.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_recurrent_activation.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_recurrent_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_resize.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_resize_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_transpose.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_transpose_stream.h create mode 100644 experiments/hostpipe_parallel/src/firmware/nnet_utils/nnet_types.h create mode 100644 experiments/hostpipe_parallel/src/firmware/parameters.h create mode 100644 experiments/hostpipe_parallel/src/firmware/weights/b2.h create mode 100644 experiments/hostpipe_parallel/src/firmware/weights/b5.h create mode 100644 experiments/hostpipe_parallel/src/firmware/weights/br5.h create mode 100644 experiments/hostpipe_parallel/src/firmware/weights/w2.h create mode 100644 experiments/hostpipe_parallel/src/firmware/weights/w5.h create mode 100644 experiments/hostpipe_parallel/src/firmware/weights/wr5.h create mode 100644 experiments/hostpipe_parallel/src/myproject_bridge.cpp create mode 100644 experiments/hostpipe_parallel/src/myproject_test.cpp create mode 100644 experiments/mlp_streaming/CMakeLists.txt create mode 100644 experiments/mlp_streaming/hls4ml_config.yml create mode 100644 experiments/mlp_streaming/keras_model.keras create mode 100644 experiments/mlp_streaming/src/exception_handler.hpp create mode 100644 experiments/mlp_streaming/src/firmware/defines.h create mode 100644 experiments/mlp_streaming/src/firmware/myproject.cpp create mode 100644 experiments/mlp_streaming/src/firmware/myproject.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/elu_table.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/exp_table.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/exp_table_latency.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/exp_table_legacy.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/invert_table.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/invert_table_latency.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/invert_table_legacy.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/selu_table.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/sigmoid_table.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/softplus_table.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/softsign_table.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/activation_tables/tanh_table.tb create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_activation.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_activation_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_batchnorm.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_batchnorm_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_common.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_conv1d.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_conv1d_resource.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_conv1d_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_conv2d.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_conv2d_resource.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_conv2d_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_dense.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_dense_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_depthconv1d.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_depthconv1d_resource.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_depthconv2d.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_depthconv2d_resource.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_embed.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_embed_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_helpers.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_merge.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_merge_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_mult.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_padding.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_padding_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_pooling.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_pooling_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_printf.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_recurrent.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_recurrent_activation.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_recurrent_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_resize.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_resize_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_transpose.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_transpose_stream.h create mode 100644 experiments/mlp_streaming/src/firmware/nnet_utils/nnet_types.h create mode 100644 experiments/mlp_streaming/src/firmware/parameters.h create mode 100644 experiments/mlp_streaming/src/firmware/weights/b2.h create mode 100644 experiments/mlp_streaming/src/firmware/weights/b5.h create mode 100644 experiments/mlp_streaming/src/firmware/weights/w2.h create mode 100644 experiments/mlp_streaming/src/firmware/weights/w5.h create mode 100644 experiments/mlp_streaming/src/myproject_bridge.cpp create mode 100644 experiments/mlp_streaming/src/myproject_test.cpp create mode 100644 experiments/requirements.txt diff --git a/experiments/.gitignore b/experiments/.gitignore new file mode 100644 index 0000000000..f7275bbbd0 --- /dev/null +++ b/experiments/.gitignore @@ -0,0 +1 @@ +venv/ diff --git a/experiments/conv1d_gru_example.ipynb b/experiments/conv1d_gru_example.ipynb new file mode 100644 index 0000000000..bf829b06f6 --- /dev/null +++ b/experiments/conv1d_gru_example.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5fe41dbe-da97-4f29-a20b-05145fdecfe1", + "metadata": {}, + "source": [ + "FPGA ML inference with oneAPI backend" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9b6e6fd-a294-44bc-8b80-6e5d5408f877", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/intel/oneapi/compiler/2024.2/bin/icpx\n" + ] + } + ], + "source": [ + "!which icpx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8597340-7981-4207-967a-30e9501b8925", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' \n", + "\n", + "import tensorflow as tf; tf.get_logger().setLevel('INFO')\n", + "\n", + "import hls4ml\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense, GRU, Conv1D, Conv2D, Flatten, MaxPool1D, MaxPool2D, Activation, BatchNormalization, Dropout\n", + "from tensorflow.keras.losses import MSE\n", + "from tensorflow.keras.optimizers import Adam\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "9b8ee1f2-9540-4a6f-8984-172034a72d92", + "metadata": {}, + "source": [ + "Example of Conv1D layers followed by GRU (dma hostpipe)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "5e2f6fd6-f2fa-44a7-b749-a46a9405c4d4", + "metadata": {}, + "outputs": [], + "source": [ + "model = Sequential()\n", + "model.add(Conv1D(16, kernel_size=3, padding='same', input_shape=(32,3)))\n", + "model.add(Activation(activation='relu', name='relu1'))\n", + "model.add(GRU(16))\n", + "model.compile(loss='mse', optimizer=Adam())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "471fe60b-2f09-47cb-b21d-410e6c327017", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Interpreting Sequential\n", + "Topology:\n", + "Layer name: conv1d_4_input, layer type: InputLayer, input shapes: [[None, 32, 3]], output shape: [None, 32, 3]\n", + "Layer name: conv1d_4, layer type: Conv1D, input shapes: [[None, 32, 3]], output shape: [None, 32, 16]\n", + "Layer name: relu1, layer type: Activation, input shapes: [[None, 32, 16]], output shape: [None, 32, 16]\n", + "Layer name: gru_4, layer type: GRU, input shapes: [[None, 32, 16]], output shape: [None, 16]\n", + "Creating HLS model\n" + ] + } + ], + "source": [ + "hls_model = hls4ml.converters.convert_from_keras_model(\n", + " model=model,\n", + " output_dir=\"hostpipe\",\n", + " backend=\"oneAPI\",\n", + " part=\"Agilex7\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "1e1d1b3f-2eaf-4cc3-bbff-ddd39de27d4f", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing HLS project\n", + "Done\n" + ] + } + ], + "source": [ + "# Write the project to disk and invoke oneAPI backend\n", + "hls_model.compile()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "9c9376b6-92ac-405b-a9da-60f960575011", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0.34667969, -0.32226562, 0.49316406, -0.2265625 , -0.48046875,\n", + " -0.55273438, -0.04882812, -0.16699219, 0.21972656, -0.31347656,\n", + " 0.25390625, -0.15820312, 0.17578125, 0.2890625 , 0.30957031,\n", + " 0.06054688],\n", + " [ 0.34667969, -0.32226562, 0.49316406, -0.2265625 , -0.48046875,\n", + " -0.55273438, -0.04882812, -0.16699219, 0.21972656, -0.31347656,\n", + " 0.25390625, -0.15820312, 0.17578125, 0.2890625 , 0.30957031,\n", + " 0.06054688],\n", + " [ 0.34667969, -0.32226562, 0.49316406, -0.2265625 , -0.48046875,\n", + " -0.55273438, -0.04882812, -0.16699219, 0.21972656, -0.31347656,\n", + " 0.25390625, -0.15820312, 0.17578125, 0.2890625 , 0.30957031,\n", + " 0.06054688]])" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test = np.ones((3, 32, 3))\n", + "hls_model.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "id": "2e5d80cc", + "metadata": {}, + "source": [ + "Get dense streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "faf22038", + "metadata": {}, + "outputs": [], + "source": [ + "def get_dense():\n", + " model = Sequential()\n", + " model.add(Dense(4, input_shape=(8,), name='fc1'))\n", + " model.add(Dense(2, name='fc2'))\n", + " model.compile()\n", + " model.summary()\n", + " config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')\n", + " hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir=\"dense_streaming\", backend=\"oneAPI\", part=\"Agilex7\", io_type=\"io_stream\", hls_config=config)\n", + " return model, config, hls_model\n", + "mlp_cpu, config, mlp_hls = get_dense()\n", + "mlp_hls.compile()\n", + "mlp_hls.predict(np.ones(8))" + ] + }, + { + "cell_type": "markdown", + "id": "d151759e-e371-43c3-b1ec-c0e6d7c59c47", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "MLP streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cff8d32-c2f4-41b0-8ee5-e4d5bc2b7ea2", + "metadata": {}, + "outputs": [], + "source": [ + "def get_mlp():\n", + " model = Sequential()\n", + " model.add(Dense(4, input_shape=(8,), name='fc1'))\n", + " model.add(Activation(activation='relu', name='relu1'))\n", + " model.add(Dense(2, name='fc2'))\n", + " model.add(Activation(activation='relu', name='relu2'))\n", + " model.compile()\n", + " model.summary()\n", + " config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')\n", + " hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir=\"mlp_streaming\", backend=\"oneAPI\", part=\"Agilex7\", io_type=\"io_stream\", hls_config=config)\n", + " return model, config, hls_model\n", + "mlp_cpu, config, mlp_hls = get_mlp()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afa6da72-432f-4a28-916f-bfb098408ff3", + "metadata": {}, + "outputs": [], + "source": [ + "def get_larger_mlp():\n", + " model = Sequential()\n", + " model.add(Dense(64, input_shape=(16,), name='fc1', kernel_initializer='lecun_uniform'))\n", + " model.add(Activation(activation='relu', name='relu1'))\n", + " model.add(Dense(32, name='fc2', kernel_initializer='lecun_uniform'))\n", + " model.add(Activation(activation='relu', name='relu2'))\n", + " model.add(Dense(32, name='fc3', kernel_initializer='lecun_uniform'))\n", + " model.add(Activation(activation='relu', name='relu3'))\n", + " model.add(Dense(5, name='output', kernel_initializer='lecun_uniform'))\n", + " model.add(Activation(activation='softmax', name='softmax'))\n", + " model.compile(loss='mse', optimizer=Adam())\n", + " model.summary()\n", + "\n", + " config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')\n", + " hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir=\"model_mlp_out\", backend=\"oneAPI\", part=\"Agilex7\", hls_config=config)\n", + "\n", + " return model, config, hls_model\n", + "\n", + "mlp_cpu, config, mlp_hls = get_larger_mlp()\n", + "mlp_hls.compile()\n", + "mlp_hls.predict(np.ones(8))" + ] + }, + { + "cell_type": "markdown", + "id": "bfa41472-1431-499b-93fe-8781ccb889d3", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "CNN - MNIST" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee582768-f534-4137-af45-d9ed37fb337f", + "metadata": {}, + "outputs": [], + "source": [ + "def get_cnn():\n", + " model = Sequential()\n", + " model.add(Conv2D(5, (4, 4), input_shape=(5, 5, 3)))\n", + " model.compile()\n", + " \n", + " config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')\n", + " hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir=\"model_cnn_out\", io_type=\"io_stream\", backend=\"oneAPI\", part=\"Agilex7\", hls_config=config)\n", + "\n", + " return model, config, hls_model\n", + "\n", + "cnn_cpu, config, cnn_hls = get_cnn()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73e2b580-5575-4ce5-b2d7-907ec3aaba59", + "metadata": {}, + "outputs": [], + "source": [ + "def get_cnn():\n", + " model = Sequential()\n", + " model.add(Conv2D(16, (3, 3), activation='relu', padding='same', input_shape=(28, 28, 1)))\n", + " model.add(MaxPool2D(pool_size=(2, 2)))\n", + " model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))\n", + " model.add(MaxPool2D(pool_size=(2, 2)))\n", + " model.add(Flatten())\n", + " model.add(Dense(10, activation='softmax'))\n", + " model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n", + " model.summary()\n", + "\n", + " config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ac_fixed<16, 6>')\n", + " hls_model = hls4ml.converters.convert_from_keras_model(model=model, output_dir=\"model_cnn_out\", backend=\"oneAPI\", part=\"Agilex7\", hls_config=config)\n", + "\n", + " return model, config, hls_model\n", + "\n", + "cnn_cpu, config, cnn_hls = get_cnn()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "359f4663-e493-4776-9eb3-07ca054ca982", + "metadata": {}, + "outputs": [], + "source": [ + "cnn_hls.compile()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee82fbf2-6836-424a-b7a0-8f949eeede50", + "metadata": {}, + "outputs": [], + "source": [ + "cnn_cpu.predict(np.ones((1,5,5,3)))" + ] + }, + { + "cell_type": "markdown", + "id": "5f774f2f-c50e-463d-835d-09768beaea85", + "metadata": {}, + "source": [ + "## Library and Layer Support\n", + "\n", + "### ML framework support:\n", + "\n", + "(Q)Keras\n", + "\n", + "PyTorch\n", + "\n", + "(Q)ONNX (in development)\n", + "\n", + "### Neural network architectures:\n", + "\n", + "Fully connected NN (multilayer perceptron, MLP)\n", + "\n", + "Convolutional NN\n", + "\n", + "Recurrent NN (LSTM, GRU)\n", + "\n", + "Graph NN (GarNet)\n", + "\n", + "### Layers:\n", + "\n", + "- Core Layers\n", + "\n", + "InputLayer, Dropout, Flatten, Dense, TernaryDense, BinaryDense, Transpose, Resize\n", + "\n", + "- Convolution\n", + "\n", + "Conv1D, Conv2D\n", + "\n", + "- Pooling\n", + "\n", + "MaxPooling1D, MaxPooling2D, AveragePooling1D, AveragePooling2D\n", + "\n", + "- Normalization\n", + "\n", + "BatchNormalization\n", + "\n", + "- Activation\n", + "\n", + "LeakyReLU, ThresholdedReLU, Sigmoid, ELU, PReLU, TanH, Binary TanH, Softmax, Softsign, SELU Activation" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/experiments/dense_streaming/CMakeLists.txt b/experiments/dense_streaming/CMakeLists.txt new file mode 100644 index 0000000000..8e76a4c0bc --- /dev/null +++ b/experiments/dense_streaming/CMakeLists.txt @@ -0,0 +1,338 @@ +# Direct CMake to use icpx rather than the default C++ compiler/linker on Linux +# and icx-cl on Windows +if(UNIX) + set(CMAKE_CXX_COMPILER icpx) +else() # Windows + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.7.2) + +project(myproject CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +############################################################################### +### Customize these build variables +############################################################################### +set(SOURCE_FILES src/firmware/myproject.cpp src/myproject_test.cpp) +set(LIBRARY_FILES src/firmware/myproject.cpp src/myproject_bridge.cpp) +set(LIB_STAMP ac2ad143) +set(TARGET_NAME myproject) +set(LIBRARY_NAME myproject-${LIB_STAMP}) + +# Use cmake -DFPGA_DEVICE=: to choose a +# different device. Here are a few device examples (this list is not +# exhaustive): +# intel_s10sx_pac:pac_s10 +# intel_s10sx_pac:pac_s10_usm +# intel_a10gx_pac:pac_a10 +# Note that depending on your installation, you may need to specify the full +# path to the board support package (BSP), this usually is in your install +# folder. +# +# You can also specify a device family (E.g. "Arria10" or "Stratix10") or a +# specific part number (E.g. "10AS066N3F40E2SG") to generate a standalone IP. +if(NOT DEFINED FPGA_DEVICE) + set(FPGA_DEVICE "Agilex7") +endif() + +# Use cmake -DUSER_FPGA_FLAGS= to set extra flags for FPGA backend +# compilation. +set(USER_FPGA_FLAGS -Wno-unused-label ${USER_FPGA_FLAGS}) + +# Use cmake -DUSER_FLAGS= to set extra flags for general compilation. +set(USER_FLAGS -Wno-unused-label -fconstexpr-steps=134217728 ${USER_FLAGS}) + +# Use cmake -DUSER_INCLUDE_PATHS= to set extra paths for general +# compilation. +set(USER_INCLUDE_PATHS src;src/firmware;${USER_INCLUDE_PATHS}) + +############################################################################### +### no changes after here +############################################################################### + +# Print the device being used for the compiles +message(STATUS "Configuring the design to run on FPGA board ${FPGA_DEVICE}") + +# Set the names of the makefile targets to be generated by cmake +set(EMULATOR_TARGET fpga_emu) +set(SIMULATOR_TARGET fpga_sim) +set(REPORT_TARGET report) +set(FPGA_TARGET fpga) +set(IP_EXPORT_TARGET fpga_ip_export) +set(LIBRARY_TARGET lib) + +# Set the names of the generated files per makefile target +set(EMULATOR_OUTPUT_NAME ${TARGET_NAME}.${EMULATOR_TARGET}) +set(SIMULATOR_OUTPUT_NAME ${TARGET_NAME}.${SIMULATOR_TARGET}) +set(REPORT_OUTPUT_NAME ${TARGET_NAME}.${REPORT_TARGET}) +set(FPGA_OUTPUT_NAME ${TARGET_NAME}.${FPGA_TARGET}) +set(IP_EXPORT_OUTPUT_NAME ${TARGET_NAME}.${IP_EXPORT_TARGET}) + +message(STATUS "Additional USER_FPGA_FLAGS=${USER_FPGA_FLAGS}") +message(STATUS "Additional USER_FLAGS=${USER_FLAGS}") + +include_directories(${USER_INCLUDE_PATHS}) +message(STATUS "Additional USER_INCLUDE_PATHS=${USER_INCLUDE_PATHS}") + +link_directories(${USER_LIB_PATHS}) +message(STATUS "Additional USER_LIB_PATHS=${USER_LIB_PATHS}") + +link_libraries(${USER_LIBS}) +message(STATUS "Additional USER_LIBS=${USER_LIBS}") + +if(WIN32) + # add qactypes for Windows + set(QACTYPES "-Qactypes") + # This is a Windows-specific flag that enables exception handling in host code + set(WIN_FLAG "/EHsc") +else() + # add qactypes for Linux + set(QACTYPES "-qactypes") +endif() + +set(COMMON_COMPILE_FLAGS -fsycl -fintelfpga -Wall ${WIN_FLAG} ${QACTYPES} ${USER_FLAGS}) +# for debugging need to do this. Not sure why +# set(COMMON_LINK_FLAGS -L/opt/intel/oneapi/compiler/2024.0/opt/oclfpga/host/linux64/lib -fsycl -fintelfpga ${QACTYPES} ${USER_FLAGS}) +set(COMMON_LINK_FLAGS -fsycl -fintelfpga ${QACTYPES} ${USER_FLAGS}) + +# A SYCL ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate +# representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. For +# this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS -DFPGA_EMULATOR) +set(LIBRARY_COMPILE_FLAGS -DFPGA_EMULATOR) +set(EMULATOR_LINK_FLAGS ) +set(LIBRARY_LINK_FLAGS -L$ENV{FPGA_VARS_DIR}/host/linux64/lib) +set(REPORT_COMPILE_FLAGS -DFPGA_HARDWARE) +set(REPORT_LINK_FLAGS -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_FPGA_FLAGS} -fsycl-link=early) +set(SIMULATOR_COMPILE_FLAGS -Xssimulation -DFPGA_SIMULATOR) +set(SIMULATOR_LINK_FLAGS -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_FPGA_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${SIMULATOR_OUTPUT_NAME}) +set(FPGA_COMPILE_FLAGS -DFPGA_HARDWARE) +set(FPGA_LINK_FLAGS -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_FPGA_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_OUTPUT_NAME}) +# get rid of this once host pipes work properly +set(IP_EXPORT_COMPILE_FLAGS -DFPGA_HARDWARE) +set(IP_EXPORT_LINK_FLAGS -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_FPGA_FLAGS} -fsycl-link=early -fsycl-device-code-split=per_kernel) + +############################################################################### +### FPGA Emulator library +############################################################################### +add_library(${LIBRARY_TARGET} SHARED ${LIBRARY_FILES}) +target_compile_options(${LIBRARY_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${LIBRARY_TARGET} PRIVATE ${LIBRARY_COMPILE_FLAGS}) +target_link_libraries(${LIBRARY_TARGET} ${COMMON_LINK_FLAGS}) +target_link_libraries(${LIBRARY_TARGET} ${LIBRARY_LINK_FLAGS}) +set_target_properties(${LIBRARY_TARGET} PROPERTIES OUTPUT_NAME ${LIBRARY_NAME}) + +############################################################################### +### FPGA Emulator +############################################################################### +add_executable(${EMULATOR_TARGET} ${SOURCE_FILES}) +target_compile_options(${EMULATOR_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${EMULATOR_TARGET} PRIVATE ${EMULATOR_COMPILE_FLAGS}) +target_link_libraries(${EMULATOR_TARGET} ${COMMON_LINK_FLAGS}) +target_link_libraries(${EMULATOR_TARGET} ${EMULATOR_LINK_FLAGS}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES OUTPUT_NAME ${EMULATOR_OUTPUT_NAME}) + +############################################################################### +### FPGA Simulator +############################################################################### +add_executable(${SIMULATOR_TARGET} ${SOURCE_FILES}) +target_compile_options(${SIMULATOR_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${SIMULATOR_TARGET} PRIVATE ${SIMULATOR_COMPILE_FLAGS}) +target_link_libraries(${SIMULATOR_TARGET} ${COMMON_LINK_FLAGS}) +target_link_libraries(${SIMULATOR_TARGET} ${SIMULATOR_LINK_FLAGS}) +set_target_properties(${SIMULATOR_TARGET} PROPERTIES OUTPUT_NAME ${SIMULATOR_OUTPUT_NAME}) + +############################################################################### +### Generate Report +############################################################################### +add_executable(${REPORT_TARGET} ${SOURCE_FILES}) +target_compile_options(${REPORT_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${REPORT_TARGET} PRIVATE ${REPORT_COMPILE_FLAGS}) + +# The report target does not need the QACTYPES flag at link stage +set(MODIFIED_COMMON_LINK_FLAGS_REPORT ${COMMON_LINK_FLAGS}) +list(REMOVE_ITEM MODIFIED_COMMON_LINK_FLAGS_REPORT ${QACTYPES}) + +target_link_libraries(${REPORT_TARGET} ${MODIFIED_COMMON_LINK_FLAGS_REPORT}) +target_link_libraries(${REPORT_TARGET} ${REPORT_LINK_FLAGS}) +set_target_properties(${REPORT_TARGET} PROPERTIES OUTPUT_NAME ${REPORT_OUTPUT_NAME}) + +############################################################################### +### FPGA Hardware +############################################################################### +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILES}) +target_compile_options(${FPGA_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${FPGA_TARGET} PRIVATE ${FPGA_COMPILE_FLAGS}) +target_link_libraries(${FPGA_TARGET} ${COMMON_LINK_FLAGS}) +target_link_libraries(${FPGA_TARGET} ${FPGA_LINK_FLAGS}) +set_target_properties(${FPGA_TARGET} PROPERTIES OUTPUT_NAME ${FPGA_OUTPUT_NAME}) + +############################################################################### +### FPGA IP Export (only necessary until native host pipes) +############################################################################### +add_executable(${IP_EXPORT_TARGET} ${SOURCE_FILES}) +target_compile_options(${IP_EXPORT_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${IP_EXPORT_TARGET} PRIVATE ${IP_EXPORT_COMPILE_FLAGS}) + +# The ip export target does not need the QACTYPES flag at link stage +set(MODIFIED_COMMON_LINK_FLAGS_EXPORT ${COMMON_LINK_FLAGS}) +list(REMOVE_ITEM MODIFIED_COMMON_LINK_FLAGS_EXPORT ${QACTYPES}) + +target_link_libraries(${IP_EXPORT_TARGET} ${MODIFIED_COMMON_LINK_FLAGS_EXPORT}) +target_link_libraries(${IP_EXPORT_TARGET} ${IP_EXPORT_LINK_FLAGS}) +set_target_properties(${IP_EXPORT_TARGET} PROPERTIES OUTPUT_NAME ${IP_EXPORT_OUTPUT_NAME}) + +############################################################################### +### This part only manipulates cmake variables to print the commands to the user +############################################################################### + +# set the correct object file extension depending on the target platform +if(WIN32) + set(OBJ_EXTENSION "obj") +else() + set(OBJ_EXTENSION "o") +endif() + +# Set the source file names in a string +set(SOURCE_FILE_NAME "${SOURCE_FILES}") + +function(getCompileCommands common_compile_flags special_compile_flags common_link_flags special_link_flags target output_name) + + set(file_names ${SOURCE_FILE_NAME}) + set(COMPILE_COMMAND ) + set(LINK_COMMAND ) + + foreach(source ${file_names}) + # Get the relative path to the source and object files + file(RELATIVE_PATH CURRENT_SOURCE_FILE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/${source}) + file(RELATIVE_PATH OBJ_FILE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${target}.dir/${source}.${OBJ_EXTENSION}) + + # Creating a string that contains the compile command + # Start by the compiler invocation + set(COMPILE_COMMAND "${COMPILE_COMMAND}${CMAKE_CXX_COMPILER}") + + # Add all the potential includes + foreach(INCLUDE ${USER_INCLUDE_PATHS}) + if(NOT IS_ABSOLUTE ${INCLUDE}) + file(RELATIVE_PATH INCLUDE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/${INCLUDE}) + endif() + set(COMPILE_COMMAND "${COMPILE_COMMAND} -I${INCLUDE}") + endforeach() + + # Add all the common compile flags + foreach(FLAG ${common_compile_flags}) + set(COMPILE_COMMAND "${COMPILE_COMMAND} ${FLAG}") + endforeach() + + # Add all the specific compile flags + foreach(FLAG ${special_compile_flags}) + set(COMPILE_COMMAND "${COMPILE_COMMAND} ${FLAG}") + endforeach() + + # Get the location of the object file + file(RELATIVE_PATH OBJ_FILE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${target}.dir/${source}.${OBJ_EXTENSION}) + + # Add the source file and the output file + set(COMPILE_COMMAND "${COMPILE_COMMAND} -c ${CURRENT_SOURCE_FILE} -o ${OBJ_FILE}\n") + endforeach() + + set(COMPILE_COMMAND "${COMPILE_COMMAND}" PARENT_SCOPE) + + # Creating a string that contains the link command + # Start by the compiler invocation + set(LINK_COMMAND "${LINK_COMMAND}${CMAKE_CXX_COMPILER}") + + # Add all the common link flags + foreach(FLAG ${common_link_flags}) + set(LINK_COMMAND "${LINK_COMMAND} ${FLAG}") + endforeach() + + # Add all the specific link flags + foreach(FLAG ${special_link_flags}) + set(LINK_COMMAND "${LINK_COMMAND} ${FLAG}") + endforeach() + + # Add the output file + set(LINK_COMMAND "${LINK_COMMAND} -o ${output_name}") + + foreach(source ${file_names}) + # Get the relative path to the source and object files + file(RELATIVE_PATH OBJ_FILE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${target}.dir/${source}.${OBJ_EXTENSION}) + + # Add the source file and the output file + set(LINK_COMMAND "${LINK_COMMAND} ${OBJ_FILE}") + endforeach() + + # Add all the potential library paths + foreach(LIB_PATH ${USER_LIB_PATHS}) + if(NOT IS_ABSOLUTE ${LIB_PATH}) + file(RELATIVE_PATH LIB_PATH ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/${LIB_PATH}) + endif() + if(NOT WIN32) + set(LINK_COMMAND "${LINK_COMMAND} -L${LIB_PATH}") + else() + set(LINK_COMMAND "${LINK_COMMAND} -L${LIB_PATH} -Wl,-rpath,${LIB_PATH}") + endif() + endforeach() + + # Add all the potential includes + foreach(LIB ${USER_LIBS}) + set(LINK_COMMAND "${LINK_COMMAND} -l${LIB}") + endforeach() + + set(LINK_COMMAND "${LINK_COMMAND}" PARENT_SCOPE) + +endfunction() + +# Windows executable is going to have the .exe extension +if(WIN32) + set(EXECUTABLE_EXTENSION ".exe") +endif() + +# Display the compile instructions in the emulation flow +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${EMULATOR_COMPILE_FLAGS}" "${COMMON_LINK_FLAGS}" "${EMULATOR_LINK_FLAGS}" "${EMULATOR_TARGET}" "${EMULATOR_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displayEmulationCompileCommands ALL + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${EMULATOR_TARGET} displayEmulationCompileCommands) + +# Display the compile instructions in the simulation flow +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${SIMULATOR_COMPILE_FLAGS}" "${COMMON_LINK_FLAGS}" "${SIMULATOR_LINK_FLAGS}" "${SIMULATOR_TARGET}" "${SIMULATOR_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displaySimulationCompileCommands ALL + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${SIMULATOR_TARGET} displaySimulationCompileCommands) + +# Display the compile instructions in the report flow +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${REPORT_COMPILE_FLAGS}" "${MODIFIED_COMMON_LINK_FLAGS_REPORT}" "${REPORT_LINK_FLAGS}" "${REPORT_TARGET}" "${REPORT_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displayReportCompileCommands ALL + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${REPORT_TARGET} displayReportCompileCommands) + +# Display the compile instructions in the IP export flow (Remove after native host pipes work properly) +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${IP_EXPORT_COMPILE_FLAGS}" "${MODIFIED_COMMON_LINK_FLAGS_EXPORT}" "${IP_EXPORT_LINK_FLAGS}" "${IP_EXPORT_TARGET}" "${IP_EXPORT_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displayExportCompileCommands ALL + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${IP_EXPORT_TARGET} displayExportCompileCommands) + +# Display the compile instructions in the fpga flow +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${FPGA_COMPILE_FLAGS}" "${COMMON_LINK_FLAGS}" "${FPGA_LINK_FLAGS}" "${FPGA_TARGET}" "${FPGA_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displayFPGACompileCommands ALL + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${FPGA_TARGET} displayFPGACompileCommands) diff --git a/experiments/dense_streaming/hls4ml_config.yml b/experiments/dense_streaming/hls4ml_config.yml new file mode 100644 index 0000000000..106a966be1 --- /dev/null +++ b/experiments/dense_streaming/hls4ml_config.yml @@ -0,0 +1,46 @@ +Backend: oneAPI +ClockPeriod: 5 +HLSConfig: + LayerName: + fc1: + Precision: + bias: auto + result: auto + weight: auto + Trace: false + fc1_input: + Precision: + result: auto + Trace: false + fc1_linear: + Precision: + result: auto + Trace: false + fc2: + Precision: + bias: auto + result: auto + weight: auto + Trace: false + fc2_linear: + Precision: + result: auto + Trace: false + Model: + BramFactor: 1000000000 + Precision: + default: ac_fixed<16, 6> + ReuseFactor: 1 + Strategy: Latency + TraceOutput: false +IOType: io_stream +InputData: null +KerasModel: !keras_model 'dense_streaming/keras_model.keras' +OutputDir: dense_streaming +OutputPredictions: null +Part: Agilex7 +ProjectName: myproject +Stamp: ac2ad143 +Version: 1.0.0 +WriterConfig: + WriteTar: false diff --git a/experiments/dense_streaming/keras_model.keras b/experiments/dense_streaming/keras_model.keras new file mode 100644 index 0000000000000000000000000000000000000000..6a30c3cdbdb955a2e0fd98b8f544e3cc48d4218b GIT binary patch literal 16672 zcmeHOZ)_Ar6rZC-E411gLR$qrPmIMBxRzoINUsEtg8ZpelSV?0?RDE**}FY=_llKN zFn+0tF(eoyF)@4-HAX%*kzV_es2>^XsG2axo2k=PPB(+8>aFq?K7&1 zgw>~;(UZC{5O*CQS}E5^81LwI9u3>_er@26Maml5uikxKDLQ43fOu4_5*6ZvZfj0- zP`4e!N&=@o5Rg65n$%bo)Ls@~6>uq%taB6kW_8gdYYb zT$8F0rTjr^jcVxo0|IP`6~+9)lr*dy82zy(S?P~8%hI({QBxlTA5rKj?$#gh`PEsk z#)#8Ul|tPGz+zUxX=iFf;B%5%LPsw&huwjnz>D1&m-2WdyTK{t*Ox=RxWAa&zVrPB zn%v%9hYv^%&>_IZI53RO8=IOohI9Iw)X2qD4-PC)|+p9A!xUP1ekskD0l(e()G)!bM-Y9Kr6 z#I+PkJ(^6LW@E7J7>wEH4oO-Fl-AtM&Dh;ZX|{t6`!&te33I4^It@{U+!`d zd6Am^MZ%3nE3tq@IyOA4+zULHs7uBZ(!ZFNgFBprCv?{~VtE7=0_uIy8651$(O^zM zE>O&KPK8#!EA6;eqW#Fh?v!n%3b%IFkL7Wd3ei42rVZr=ikU_#i58aCeZaJOH8VPs zfs{ebOry`Bd7^FW2@MGBvY-ZMSpuAE8fAtzZruvo4yhJ|qbF^{)ua8k^sX^fqbVE) z{+osiAzUJ5NQcC-3mf!;~JYN#77|1xgH5*X^9w9mYk+GY&64SNQzqb16qx=R1XuqS*OF zN2!XtK`x(bkH}F(wjcQUP3YXV!I_5_%d1pXfqb=*Rg0BG=XRiHL4WSYCGzw8S5beK z1FHq-f=g!wh<}_XKy|hE4nX#S!;2IJ+MEZtK=n}y zyg1)>cDovp+l^9vYR5t~`G5E0Jh2zAT>i9Jo_Owjjf9;R2a5ni#E3bs1Lvbg*DCx* z2*y(RQ}K*y$m>qs{9=h9;LV>>`cKqRUY%j!{3+XcCo>jUDt}X!^*5I(kIM3=s6PFq z8i4h>yg}8o?WjJzNzvIJ;%$ZP&QksIHbs9>{XN1 zJ3$iXYS=H3xl`-Xh4M%E)>T~{v~O9ScORFQxHMY?AY&&nBj5s}K>`t2rU;BL=W)ID zGVoQ{vL^U6<$8mnwnEgNCODVF3L$^lG9|ErBA~Ls#dWI58EVPJ+kEq%lpI&r(dZW& z=T?L9=nT!>ZmDr6TWWWF^5t~q=5Wnetyv!V`}FqA#9;i2)q5r*o?q9#W1>7#{le|B zuWKKROx*hDrR%TUxN^R2IMegq`(tZvbZ6H8Q62g1n;+ZH?Em81iG#7R$6x)az3TkB z@$%o-XL?Wmnz`M$I`ZlMwUL(3e#mTSTNPRT-JML&o+$m$Z+pM)Bj6+8Bj6+8Bj6)2e*|X6l`5hN z(DN$ClUy!5jtoAhwBw8e>pH?Wi=r%+|4Y&PYSy;Y^Y-XwH64yS7scd!CGHyn-sf%J z^>wt5SP}m2f*$MWt7qoUzvTSTlqY|8FY{KI!^+QF;kOpW@>W6hIf>{X`zU2c)h{Xx zd^?)|UU(3Ccug~w0hL@RI`3>i-)nqt26giVcg35iOYrWqb}M<*_9yVM;6rz6pLFM z=(~Y9X{*j#QT%s+i)vQBGWQz+?|I9+e$w-Pm(Rhje@Q`(TfOg-bKiH&{=T&)7@>Dd z9inBWXt_flF7xxNSG{6((nXnQJ{?NMrBe|lnep+CO?}zc**Q z=~y5 +#include +#include + +namespace fpga_tools { + +void exception_handler(sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } catch (sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl; + } + } +} + +} // namespace fpga_tools + +#endif //__EXCEPTIONHANDLER_HPP__ diff --git a/experiments/dense_streaming/src/firmware/defines.h b/experiments/dense_streaming/src/firmware/defines.h new file mode 100644 index 0000000000..12033292c1 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/defines.h @@ -0,0 +1,33 @@ +#ifndef DEFINES_H_ +#define DEFINES_H_ + +#include +#include +#include +#include + +// Include nnet::array - a custom array-like struct, mainly used with io_stream +#include "nnet_utils/nnet_types.h" + +// hls-fpga-machine-learning insert numbers +#define N_INPUT_1_1 8 +#define N_LAYER_2 4 +#define N_LAYER_4 2 + +// hls-fpga-machine-learning insert layer-precision +typedef nnet::array, 8*1> input_t; +typedef ac_fixed<16,6,true> model_default_t; +typedef nnet::array, 4*1> fc1_result_t; +typedef nnet::array, 32*1> w2_t; +typedef nnet::array, 4*1> b2_t; +typedef ac_int<1, false> layer2_index; +typedef nnet::array, 2*1> result_t; +typedef nnet::array, 8*1> w4_t; +typedef nnet::array, 2*1> b4_t; +typedef ac_int<1, false> layer4_index; + +#define DIV_ROUNDUP(n, d) ((n + d - 1) / d) +#define MIN(n, d) (n > d ? d : n) +#define MAX(n, d) (n < d ? d : n) + +#endif diff --git a/experiments/dense_streaming/src/firmware/myproject.cpp b/experiments/dense_streaming/src/firmware/myproject.cpp new file mode 100644 index 0000000000..23e7963592 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/myproject.cpp @@ -0,0 +1,35 @@ +#include "myproject.h" +#include "parameters.h" +#include + +// hls-fpga-machine-learning insert weights +#include "weights/w2.h" +#include "weights/b2.h" +#include "weights/w4.h" +#include "weights/b4.h" + +// The inter-task pipes need to be declared in the global scope +// hls-fpga-machine-learning insert inter-task pipes +class Layer2OutPipeID; +using Layer2OutPipe = sycl::ext::intel::experimental::pipe; + +using sycl::ext::intel::experimental::task_sequence; + +void Myproject::operator()() const { + // **************************************** + // NETWORK INSTANTIATION + // **************************************** + + // hls-fpga-machine-learning read in + + // hls-fpga-machine-learning declare task sequences + task_sequence> fc1; + task_sequence> fc2; + + // hls-fpga-machine-learning insert layers + + fc1.async(w2, b2); + fc2.async(w4, b4); + + // hls-fpga-machine-learning return +} diff --git a/experiments/dense_streaming/src/firmware/myproject.h b/experiments/dense_streaming/src/firmware/myproject.h new file mode 100644 index 0000000000..1276f55eca --- /dev/null +++ b/experiments/dense_streaming/src/firmware/myproject.h @@ -0,0 +1,33 @@ +#ifndef MYPROJECT_H_ +#define MYPROJECT_H_ + +#include "defines.h" + +// This file defines the interface to the kernel + +// currently this is fixed +using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(sycl::ext::intel::experimental::ready_latency<0>)); + +// Need to declare the input and output pipes + +// hls-fpga-machine-learning insert inputs +class Fc1InputPipeID; +using Fc1InputPipe = sycl::ext::intel::experimental::pipe; +// hls-fpga-machine-learning insert outputs +class Layer4OutPipeID; +using Layer4OutPipe = sycl::ext::intel::experimental::pipe; + +class MyprojectID; + +struct Myproject { + + // kernel property method to config invocation interface + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{sycl::ext::intel::experimental::streaming_interface<>, + sycl::ext::intel::experimental::pipelined<>}; + } + + SYCL_EXTERNAL void operator()() const; +}; + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/elu_table.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/elu_table.tb new file mode 100644 index 0000000000..75c834fd93 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/elu_table.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t elu_table[1024] = {0.0, -0.007782061739756374, -0.015503562994591547, -0.023164975049937864, -0.03076676552365587, -0.0383093983945747, -0.04579333403081165, -0.05321902921787114, -0.06058693718652419, -0.06789750764047242, -0.07515118678379518, -0.08234841734818421, -0.08948963861996584, -0.09657528646691327, -0.10360579336484954, -0.11058158842404442, -0.11750309741540466, -0.12437074279646176, -0.13118494373715683, -0.1379461161454243, -0.14465467269257748, -0.15131102283849607, -0.15791557285661773, -0.16446872585873495, -0.17097088181959963, -0.17742243760133536, -0.18382378697766022, -0.19017532065792075, -0.19647742631093923, -0.2027304885886756, -0.20893488914970404, -0.21509100668250836, -0.22119921692859512, -0.22725989270542757, -0.23327340392917983, -0.23924011763731634, -0.24516039801099265, -0.25103460639728425, -0.2568631013312417, -0.262646238557773, -0.2683843710533581, -0.2740778490475917, -0.2797270200445602, -0.28533222884405174, -0.2908938175626016, -0.29641212565437247, -0.3018874899318742, -0.30732024458652074, -0.3127107212090279, -0.31805924880965186, -0.32336615383827105, -0.3286317602043105, -0.3338563892965122, -0.3390403600025511, -0.3441839887284984, -0.3492875894181341, -0.35435147357210794, -0.3593759502669527, -0.364361326173948, -0.3693079055778393, -0.37421599039540887, -0.3790858801939042, -0.38391787220932183, -0.3887122613645494, -0.3934693402873666, -0.39818939932830555, -0.4028727265783726, -0.4075196078866321, -0.4121303268776535, -0.41670516496882215, -0.42124440138751584, -0.4257483131881479, -0.430217175269077, -0.43465126038938584, -0.4390508391855291, -0.443416180187852, -0.4477475498369796, -0.45204521250008, -0.45630943048699957, -0.46054046406627314, -0.46473857148100983, -0.4689040089646548, -0.47303703075662906, -0.4771378891178464, -0.48120683434611067, -0.48524411479139296, -0.48924997687098926, -0.4932246650845613, -0.4971684220290591, -0.501081488413528, -0.5049641030738015, -0.5088165029870768, -0.5126389232863808, -0.5164315972749205, -0.5201947564403224, -0.5239286304687627, -0.5276334472589853, -0.5313094329362118, -0.5349568118659437, -0.538575806667656, -0.5421666382283857, -0.5457295257162127, -0.5492646865936376, -0.5527723366308543, -0.5562526899189202, -0.5597059588828219, -0.5631323542944427, -0.5665320852854254, -0.5699053593599377, -0.5732523824073371, -0.5765733587147372, -0.5798684909794769, -0.5831379803214916, -0.5863820262955893, -0.58960082690363, -0.592794578606611, -0.5959634763366579, -0.5991077135089227, -0.6022274820333884, -0.6053229723265829, -0.608394373323201, -0.6114418724876359, -0.6144656558254213, -0.6174659078945843, -0.6204428118169104, -0.6233965492891196, -0.6263273005939569, -0.6292352446111963, -0.6321205588285577, -0.634983419352541, -0.6378240009191742, -0.6406424769046792, -0.643439019336053, -0.6462137989015679, -0.6489669849611899, -0.6516987455569148, -0.6544092474230254, -0.6570986559962684, -0.6597671354259509, -0.6624148485839604, -0.665041957074705, -0.6676486212449777, -0.6702350001937425, -0.6728012517818456, -0.6753475326416503, -0.6778739981865972, -0.6803808026206901, -0.6828680989479072, -0.685336038981541, -0.687784773353463, -0.6902144515233184, -0.6926252217876483, -0.6950172312889407, -0.6973906260246121, -0.6997455508559176, -0.7020821495167935, -0.7044005646226292, -0.7067009376789731, -0.7089834090901681, -0.7112481181679218, -0.7134952031398099, -0.7157248011577122, -0.7179370483061845, -0.7201320796107638, -0.72231002904621, -0.7244710295446837, -0.7266152130038583, -0.7287427102949726, -0.7308536512708161, -0.7329481647736567, -0.7350263786431034, -0.7370884197239096, -0.739134413873715, -0.7411644859707276, -0.743178759921345, -0.745177358667718, -0.7471604041952535, -0.7491280175400612, -0.7510803187963395, -0.7530174271237069, -0.754939460754474, -0.7568465370008608, -0.7587387722621557, -0.7606162820318211, -0.7624791809045419, -0.7643275825832203, -0.7661615998859153, -0.7679813447527291, -0.7697869282526385, -0.7715784605902755, -0.7733560511126522, -0.7751198083158357, -0.7768698398515702, -0.7786062525338471, -0.7803291523454247, -0.7820386444442973, -0.7837348331701127, -0.7854178220505417, -0.7870877138075958, -0.7887446103638978, -0.7903886128489022, -0.792019821605068, -0.7936383361939829, -0.7952442554024403, -0.7968376772484682, -0.798418698987313, -0.7999874171173742, -0.8015439273860949, -0.8030883247958059, -0.8046207036095239, -0.8061411573567048, -0.8076497788389526, -0.8091466601356837, -0.8106318926097464, -0.8121055669129983, -0.8135677729918379, -0.8150186000926957, -0.8164581367674809, -0.8178864708789864, -0.819303689606251, -0.8207098794498814, -0.8221051262373308, -0.8234895151281381, -0.8248631306191252, -0.8262260565495549, -0.8275783761062472, -0.8289201718286574, -0.8302515256139138, -0.8315725187218157, -0.8328832317797943, -0.8341837447878325, -0.8354741371233488, -0.8367544875460415, -0.8380248742026966, -0.839285374631956, -0.8405360657690519, -0.8417770239505016, -0.8430083249187668, -0.8442300438268767, -0.8454422552430152, -0.8466450331550716, -0.8478384509751571, -0.8490225815440854, -0.8501974971358184, -0.8513632694618775, -0.8525199696757207, -0.8536676683770854, -0.8548064356162974, -0.8559363408985468, -0.8570574531881294, -0.8581698409126575, -0.8592735719672346, -0.860368713718601, -0.8614553330092445, -0.8625334961614806, -0.8636032689815004, -0.8646647167633873, -0.865717904293102, -0.866762895852437, -0.8677997552229397, -0.8688285456898057, -0.8698493300457413, -0.870862170594796, -0.8718671291561655, -0.8728642670679644, -0.8738536451909708, -0.8748353239123399, -0.8758093631492906, -0.8767758223527625, -0.877734760511044, -0.8786862361533732, -0.8796303073535099, -0.8805670317332803, -0.8814964664660944, -0.8824186682804345, -0.8833336934633189, -0.8842415978637368, -0.8851424368960568, -0.8860362655434096, -0.886923138361044, -0.8878031094796562, -0.8886762326086943, -0.8895425610396357, -0.8904021476492406, -0.891255044902779, -0.8921013048572326, -0.8929409791644728, -0.8937741190744127, -0.8946007754381357, -0.8954209987109986, -0.8962348389557117, -0.897042345845394, -0.8978435686666058, -0.8986385563223553, -0.8994273573350856, -0.9002100198496344, -0.9009865916361737, -0.9017571200931247, -0.9025216522500515, -0.9032802347705311, -0.9040329139550015, -0.9047797357435883, -0.9055207457189076, -0.9062559891088492, -0.9069855107893365, -0.9077093552870658, -0.9084275667822238, -0.9091401891111842, -0.9098472657691836, -0.9105488399129756, -0.9112449543634658, -0.9119356516083241, -0.9126209738045796, -0.9133009627811922, -0.9139756600416064, -0.9146451067662844, -0.9153093438152196, -0.91596841173043, -0.9166223507384336, -0.9172712007527029, -0.9179150013761012, -0.9185537919033002, -0.9191876113231778, -0.919816498321198, -0.9204404912817723, -0.9210596282906023, -0.9216739471370041, -0.9222834853162154, -0.9228882800316833, -0.9234883681973356, -0.9240837864398334, -0.9246745711008072, -0.9252607582390743, -0.9258423836328404, -0.9264194827818828, -0.9269920909097175, -0.9275602429657486, -0.9281239736274021, -0.928683317302242, -0.9292383081300706, -0.929788979985012, -0.9303353664775799, -0.9308775009567288, -0.9314154165118899, -0.9319491459749898, -0.9324787219224556, -0.9330041766772024, -0.9335255423106061, -0.9340428506444618, -0.934556133252925, -0.9350654214644392, -0.9355707463636479, -0.9360721387932924, -0.9365696293560937, -0.9370632484166201, -0.9375530261031417, -0.938038992309468, -0.9385211766967733, -0.9389996086954066, -0.9394743175066882, -0.939945332104692, -0.9404126812380139, -0.9408763934315263, -0.9413364969881192, -0.9417930199904281, -0.9422459903025473, -0.9426954355717314, -0.943141383230082, -0.9435838604962227, -0.9440228943769597, -0.9444585116689311, -0.9448907389602414, -0.9453196026320853, -0.9457451288603568, -0.946167343617248, -0.9465862726728337, -0.9470019415966442, -0.9474143757592264, -0.9478236003336916, -0.9482296402972531, -0.9486325204327493, -0.9490322653301573, -0.9494288993880935, -0.9498224468153028, -0.950212931632136, -0.9506003776720162, -0.9509848085828932, -0.9513662478286872, -0.9517447186907204, -0.9521202442691384, -0.9524928474843205, -0.9528625510782776, -0.953229377616041, -0.9535933494870398, -0.953954488906467, -0.9543128179166355, -0.9546683583883238, -0.9550211320221103, -0.9553711603496986, -0.9557184647352309, -0.9560630663765926, -0.9564049863067056, -0.9567442453948123, -0.9570808643477496, -0.9574148637112124, -0.9577462638710077, -0.9580750850542988, -0.9584013473308404, -0.9587250706142024, -0.9590462746629869, -0.9593649790820324, -0.9596812033236124, -0.9599949666886207, -0.9603062883277509, -0.9606151872426645, -0.9609216822871508, -0.961225792168278, -0.9615275354475346, -0.9618269305419624, -0.962123995725281, -0.962418749129002, -0.9627112087435374, -0.9630013924192962, -0.9632893178677742, -0.9635750026626357, -0.9638584642407856, -0.9641397199034334, -0.9644187868171499, -0.9646956820149143, -0.9649704223971541, -0.9652430247327771, -0.9655135056601938, -0.965781881688334, -0.9660481691976537, -0.9663123844411353, -0.9665745435452795, -0.9668346625110897, -0.9670927572150483, -0.9673488434100859, -0.9676029367265431, -0.9678550526731239, -0.9681052066378429, -0.968353413888964, -0.9685996895759326, -0.9688440487303002, -0.9690865062666418, -0.9693270769834664, -0.9695657755641198, -0.9698026165776815, -0.970037614479853, -0.9702707836138412, -0.9705021382112329, -0.9707316923928639, -0.9709594601696808, -0.9711854554435962, -0.9714096920083372, -0.9716321835502869, -0.9718529436493206, -0.972071985779634, -0.9722893233105657, -0.9725049695074133, -0.9727189375322431, -0.9729312404446934, -0.9731418912027714, -0.9733509026636445, -0.9735582875844249, -0.9737640586229477, -0.9739682283385446, -0.9741708091928093, -0.9743718135503587, -0.9745712536795879, -0.9747691417534179, -0.9749654898500398, -0.9751603099536514, -0.9753536139551885, -0.9755454136530511, -0.9757357207538233, -0.9759245468729877, -0.9761119035356347, -0.9762978021771657, -0.9764822541439909, -0.9766652706942223, -0.9768468629983603, -0.9770270421399762, -0.9772058191163877, -0.977383204839331, -0.9775592101356269, -0.9777338457478403, -0.9779071223349376, -0.978079050472936, -0.9782496406555496, -0.9784189032948301, -0.9785868487218014, -0.978753487187091, -0.9789188288615556, -0.9790828838369013, -0.9792456621263003, -0.9794071736650012, -0.9795674283109362, -0.9797264358453225, -0.9798842059732591, -0.9800407483243194, -0.9801960724531389, -0.9803501878399983, -0.980503103891402, -0.9806548299406528, -0.9808053752484207, -0.9809547490033089, -0.9811029603224143, -0.981250018251884, -0.9813959317674671, -0.9815407097750632, -0.9816843611112658, -0.981826894543901, -0.9819683187725635, -0.982108642429147, -0.9822478740783714, -0.9823860222183055, -0.9825230952808853, -0.9826591016324293, -0.9827940495741486, -0.9829279473426538, -0.9830608031104576, -0.983192624986474, -0.9833234210165125, -0.9834531991837698, -0.9835819674093172, -0.9837097335525833, -0.9838365054118341, -0.9839622907246495, -0.9840870971683945, -0.9842109323606888, -0.9843338038598711, -0.9844557191654607, -0.9845766857186152, -0.9846967109025846, -0.9848158020431621, -0.9849339664091309, -0.9850512112127082, -0.9851675436099855, -0.9852829707013648, -0.9853974995319926, -0.9855111370921895, -0.985623890317877, -0.9857357660910008, -0.9858467712399509, -0.9859569125399785, -0.9860661967136092, -0.9861746304310535, -0.986282220310614, -0.9863889729190892, -0.9864948947721743, -0.9865999923348592, -0.9867042720218225, -0.9868077401978237, -0.9869104031780911, -0.9870122672287077, -0.9871133385669935, -0.9872136233618845, -0.98731312773431, -0.987411857757566, -0.9875098194576855, -0.9876070188138064, -0.9877034617585371, -0.9877991541783174, -0.987894101913779, -0.9879883107601012, -0.9880817864673648, -0.9881745347409033, -0.988266561241651, -0.9883578715864881, -0.9884484713485843, -0.9885383660577381, -0.9886275612007153, -0.9887160622215829, -0.9888038745220422, -0.9888910034617577, -0.988977454358685, -0.989063232489395, -0.9891483430893959, -0.9892327913534529, -0.9893165824359051, -0.9893997214509805, -0.9894822134731076, -0.9895640635372255, -0.989645276639091, -0.9897258577355836, -0.9898058117450084, -0.9898851435473954, -0.9899638579847984, -0.9900419598615896, -0.9901194539447538, -0.9901963449641782, -0.9902726376129423, -0.9903483365476032, -0.9904234463884812, -0.9904979717199401, -0.9905719170906685, -0.9906452870139564, -0.9907180859679715, -0.9907903183960318, -0.9908619887068775, -0.9909331012749392, -0.9910036604406056, -0.9910736705104887, -0.9911431357576854, -0.9912120604220398, -0.9912804487104008, -0.9913483047968794, -0.9914156328231036, -0.9914824368984709, -0.9915487211003994, -0.9916144894745759, -0.9916797460352041, -0.9917444947652486, -0.9918087396166784, -0.9918724845107078, -0.9919357333380363, -0.9919984899590856, -0.9920607582042352, -0.9921225418740567, -0.9921838447395451, -0.9922446705423495, -0.9923050229950011, -0.99236490578114, -0.9924243225557401, -0.9924832769453318, -0.9925417725482238, -0.9925998129347228, -0.9926574016473505, -0.992714542201061, -0.9927712380834548, -0.9928274927549913, -0.9928833096492008, -0.9929386921728934, -0.9929936437063674, -0.9930481676036153, -0.9931022671925284, -0.9931559457751004, -0.9932092066276288, -0.9932620530009145, -0.9933144881204606, -0.9933665151866691, -0.9934181373750361, -0.9934693578363463, -0.993520179696864, -0.9935706060585255, -0.9936206399991273, -0.9936702845725143, -0.9937195428087661, -0.9937684177143821, -0.9938169122724652, -0.9938650294429032, -0.9939127721625501, -0.9939601433454051, -0.9940071458827904, -0.994053782643528, -0.994100056474114, -0.9941459701988935, -0.9941915266202321, -0.9942367285186872, -0.9942815786531777, -0.9943260797611526, -0.9943702345587578, -0.9944140457410019, -0.9944575159819209, -0.9945006479347411, -0.9945434442320417, -0.9945859074859144, -0.9946280402881237, -0.994669845210265, -0.9947113248039212, -0.9947524816008186, -0.9947933181129816, -0.9948338368328856, -0.9948740402336097, -0.9949139307689873, -0.9949535108737557, -0.9949927829637052, -0.995031749435826, -0.995070412668455, -0.9951087750214206, -0.9951468388361869, -0.9951846064359967, -0.995222080126013, -0.99525926219346, -0.9952961549077625, -0.9953327605206845, -0.9953690812664667, -0.9954051193619629, -0.995440877006775, -0.9954763563833874, -0.9955115596573004, -0.9955464889771626, -0.995581146474901, -0.9956155342658523, -0.9956496544488912, -0.9956835091065592, -0.9957171003051911, -0.9957504300950413, -0.9957835005104091, -0.9958163135697627, -0.9958488712758626, -0.9958811756158835, -0.995913228561536, -0.9959450320691864, -0.9959765880799767, -0.9960078985199428, -0.9960389653001319, -0.9960697903167196, -0.9961003754511251, -0.9961307225701264, -0.9961608335259738, -0.996190710156504, -0.9962203542852506, -0.9962497677215572, -0.9962789522606865, -0.9963079096839303, -0.9963366417587188, -0.9963651502387273, -0.9963934368639843, -0.9964215033609771, -0.9964493514427575, -0.9964769828090461, -0.9965043991463364, -0.9965316021279971, -0.996558593414375, -0.996585374652896, -0.9966119474781653, -0.9966383135120678, -0.9966644743638665, -0.9966904316303015, -0.9967161868956864, -0.996741741732006, -0.9967670976990116, -0.9967922563443165, -0.9968172192034903, -0.9968419878001529, -0.996866563646067, -0.9968909482412309, -0.9969151430739698, -0.9969391496210265, -0.9969629693476517, -0.9969866037076939, -0.9970100541436869, -0.9970333220869393, -0.9970564089576207, -0.9970793161648491, -0.9971020451067766, -0.9971245971706747, -0.9971469737330191, -0.9971691761595739, -0.9971912058054745, -0.9972130640153105, -0.9972347521232081, -0.9972562714529111, -0.9972776233178616, -0.997298809021281, -0.9973198298562482, -0.9973406871057799, -0.997361382042908, -0.9973819159307578, -0.9974022900226245, -0.9974225055620505, -0.9974425637829004, -0.9974624659094374, -0.9974822131563968, -0.9975018067290613, -0.9975212478233336, -0.99754053762581, -0.9975596773138525, -0.9975786680556611, -0.9975975110103444, -0.997616207327991, -0.9976347581497393, -0.9976531646078475, -0.9976714278257622, -0.9976895489181877, -0.997707528991153, -0.9977253691420804, -0.997743070459852, -0.9977606340248757, -0.997778060909152, -0.997795352176339, -0.9978125088818172, -0.9978295320727539, -0.9978464227881679, -0.9978631820589918, -0.9978798109081354, -0.9978963103505482, -0.9979126813932815, -0.9979289250355495, -0.9979450422687905, -0.9979610340767274, -0.9979769014354276, -0.9979926453133627, -0.9980082666714677, -0.9980237664631995, -0.9980391456345951, -0.9980544051243293, -0.9980695458637723, -0.9980845687770462, -0.9980994747810815, -0.998114264785673, -0.9981289396935358, -0.9981435004003595, -0.9981579477948638, -0.998172282758852, -0.9981865061672653, -0.9982006188882364, -0.9982146217831415, -0.9982285157066539, -0.9982423015067957, -0.9982559800249897, -0.9982695520961106, -0.9982830185485361, -0.9982963802041974, -0.9983096378786295, -0.9983227923810205, -0.9983358445142618, -0.9983487950749963, -0.9983616448536677, -0.9983743946345681, -0.9983870451958864, -0.9983995973097555, -0.9984120517422994, -0.9984244092536804, -0.9984366705981448, -0.9984488365240696, -0.9984609077740078, -0.9984728850847336, -0.9984847691872879, -0.9984965608070224, -0.9985082606636442, -0.9985198694712593, -0.9985313879384169, -0.9985428167681518, -0.9985541566580283, -0.9985654083001815, -0.9985765723813608, -0.9985876495829711, -0.9985986405811146, -0.998609546046632, -0.9986203666451433, -0.9986311030370891, -0.9986417558777698, -0.9986523258173867, -0.9986628135010809, -0.998673219568973, -0.9986835446562023, -0.9986937893929654, -0.9987039544045545, -0.9987140403113958, -0.9987240477290873, -0.9987339772684366, -0.9987438295354976, -0.9987536051316079, -0.9987633046534259, -0.9987729286929662, -0.9987824778376366, -0.9987919526702737, -0.9988013537691781, -0.9988106817081506, -0.9988199370565259, -0.9988291203792088, -0.9988382322367076, -0.9988472731851689, -0.9988562437764109, -0.9988651445579582, -0.9988739760730738, -0.9988827388607936, -0.9988914334559583, -0.9989000603892467, -0.9989086201872076, -0.9989171133722923, -0.9989255404628862, -0.9989339019733409, -0.998942198414005, -0.9989504302912556, -0.9989585981075293, -0.9989667023613523, -0.9989747435473718, -0.9989827221563853, -0.9989906386753707, -0.9989984935875168, -0.9990062873722515, -0.9990140205052722, -0.9990216934585743, -0.9990293067004801, -0.9990368606956673, -0.9990443559051976, -0.9990517927865447, -0.9990591717936218, -0.9990664933768103, -0.9990737579829865, -0.9990809660555489, -0.9990881180344455, -0.9990952143562007, -0.999102255453942, -0.999109241757426, -0.9991161736930649, -0.9991230516839528, -0.9991298761498908, -0.9991366475074136, -0.999143366169814, -0.9991500325471689, -0.9991566470463636, -0.9991632100711173, -0.999169722022007, -0.9991761832964929, -0.9991825942889418, -0.9991889553906518, -0.9991952669898754, -0.9992015294718442, -0.9992077432187917, -0.9992139086099768, -0.999220026021707, -0.9992260958273615, -0.9992321183974137, -0.9992380940994537, -0.9992440232982117, -0.9992499063555789, -0.9992557436306303, -0.999261535479647, -0.999267282256137, -0.9992729843108575, -0.9992786419918362, -0.9992842556443923, -0.9992898256111574, -0.9992953522320974, -0.999300835844532, -0.9993062767831561, -0.9993116753800598, -0.9993170319647492, -0.9993223468641659, -0.9993276204027071, -0.9993328529022457, -0.9993380446821499, -0.9993431960593024, -0.9993483073481199, -0.9993533788605721, -0.9993584109062014, -0.9993634037921408, -0.9993683578231337, -0.9993732733015516, -0.9993781505274129, -0.9993829897984013, -0.9993877914098842, -0.9993925556549301, -0.9993972828243269, -0.9994019732065996, -0.9994066270880279, -0.9994112447526635, -0.999415826482348, -0.999420372556729, -0.9994248832532786, -0.9994293588473092, -0.9994337996119906, -0.9994382058183672, -0.9994425777353737, -0.9994469156298522, -0.9994512197665679, -0.9994554904082261, -0.9994597278154872, -0.9994639322469833, -0.9994681039593339, -0.9994722432071615, -0.999476350243107, -0.9994804253178452, -0.9994844686801001, -0.9994884805766604, -0.9994924612523939, -0.9994964109502631, -0.9995003299113395, -0.9995042183748191, -0.9995080765780356, -0.9995119047564766, -0.9995157031437966, -0.9995194719718319, -0.9995232114706148, -0.9995269218683873, -0.9995306033916153, -0.9995342562650021, -0.9995378807115027, -0.999541476952337, -0.999545045207003, -0.9995485856932909, -0.9995520986272959, -0.9995555842234316, -0.999559042694443, -0.9995624742514194, -0.9995658791038077, -0.9995692574594243, -0.999572609524469, -0.9995759355035366, -0.9995792355996297, -0.9995825100141713, -0.9995857589470167, -0.9995889825964662, -0.9995921811592766, -0.9995953548306737, -0.9995985038043641, -0.9996016282725468, -0.9996047284259253, -0.9996078044537187, -0.9996108565436739, -0.9996138848820767, -0.9996168896537629, -0.9996198710421306, -0.9996228292291498, -0.999625764395375, -0.9996286767199554, -0.9996315663806461, -0.9996344335538189, -0.999637278414473, -0.9996401011362459, -0.9996429018914238, -0.9996456808509518, -0.9996484381844453, -0.9996511740601993, -0.9996538886451992, -0.9996565821051312, -0.9996592546043915, -0.9996619063060977}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table.tb new file mode 100644 index 0000000000..705e851f83 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t exp_table[1024] = {1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 3e-12, 3e-12, 3e-12, 3e-12, 3e-12, 3e-12, 3e-12, 3e-12, 3e-12, 3e-12, 4e-12, 4e-12, 4e-12, 4e-12, 4e-12, 4e-12, 4e-12, 4e-12, 5e-12, 5e-12, 5e-12, 5e-12, 5e-12, 5e-12, 5e-12, 6e-12, 6e-12, 6e-12, 6e-12, 6e-12, 7e-12, 7e-12, 7e-12, 7e-12, 7e-12, 8e-12, 8e-12, 8e-12, 8e-12, 9e-12, 9e-12, 9e-12, 1e-11, 1e-11, 1e-11, 1e-11, 1.1e-11, 1.1e-11, 1.2e-11, 1.2e-11, 1.2e-11, 1.3e-11, 1.3e-11, 1.3e-11, 1.4e-11, 1.4e-11, 1.5e-11, 1.5e-11, 1.6e-11, 1.6e-11, 1.7e-11, 1.7e-11, 1.8e-11, 1.8e-11, 1.9e-11, 2e-11, 2e-11, 2.1e-11, 2.2e-11, 2.2e-11, 2.3e-11, 2.4e-11, 2.4e-11, 2.5e-11, 2.6e-11, 2.7e-11, 2.8e-11, 2.8e-11, 2.9e-11, 3e-11, 3.1e-11, 3.2e-11, 3.3e-11, 3.4e-11, 3.5e-11, 3.7e-11, 3.8e-11, 3.9e-11, 4e-11, 4.1e-11, 4.3e-11, 4.4e-11, 4.6e-11, 4.7e-11, 4.8e-11, 5e-11, 5.2e-11, 5.3e-11, 5.5e-11, 5.7e-11, 5.8e-11, 6e-11, 6.2e-11, 6.4e-11, 6.6e-11, 6.8e-11, 7.1e-11, 7.3e-11, 7.5e-11, 7.7e-11, 8e-11, 8.2e-11, 8.5e-11, 8.8e-11, 9.1e-11, 9.3e-11, 9.6e-11, 9.9e-11, 1.03e-10, 1.06e-10, 1.09e-10, 1.13e-10, 1.16e-10, 1.2e-10, 1.24e-10, 1.28e-10, 1.32e-10, 1.36e-10, 1.4e-10, 1.45e-10, 1.49e-10, 1.54e-10, 1.59e-10, 1.64e-10, 1.69e-10, 1.75e-10, 1.8e-10, 1.86e-10, 1.92e-10, 1.98e-10, 2.04e-10, 2.11e-10, 2.17e-10, 2.24e-10, 2.31e-10, 2.39e-10, 2.46e-10, 2.54e-10, 2.62e-10, 2.7e-10, 2.79e-10, 2.88e-10, 2.97e-10, 3.06e-10, 3.16e-10, 3.26e-10, 3.36e-10, 3.47e-10, 3.58e-10, 3.7e-10, 3.81e-10, 3.93e-10, 4.06e-10, 4.19e-10, 4.32e-10, 4.46e-10, 4.6e-10, 4.75e-10, 4.9e-10, 5.05e-10, 5.21e-10, 5.38e-10, 5.55e-10, 5.72e-10, 5.91e-10, 6.09e-10, 6.29e-10, 6.49e-10, 6.69e-10, 6.9e-10, 7.12e-10, 7.35e-10, 7.58e-10, 7.82e-10, 8.07e-10, 8.33e-10, 8.59e-10, 8.86e-10, 9.15e-10, 9.44e-10, 9.74e-10, 1.005e-09, 1.036e-09, 1.069e-09, 1.103e-09, 1.138e-09, 1.174e-09, 1.212e-09, 1.25e-09, 1.29e-09, 1.331e-09, 1.373e-09, 1.417e-09, 1.462e-09, 1.508e-09, 1.556e-09, 1.605e-09, 1.656e-09, 1.709e-09, 1.763e-09, 1.819e-09, 1.877e-09, 1.936e-09, 1.998e-09, 2.061e-09, 2.127e-09, 2.194e-09, 2.264e-09, 2.336e-09, 2.41e-09, 2.486e-09, 2.565e-09, 2.647e-09, 2.731e-09, 2.817e-09, 2.907e-09, 2.999e-09, 3.094e-09, 3.192e-09, 3.294e-09, 3.398e-09, 3.506e-09, 3.617e-09, 3.732e-09, 3.851e-09, 3.973e-09, 4.099e-09, 4.229e-09, 4.363e-09, 4.502e-09, 4.645e-09, 4.792e-09, 4.944e-09, 5.101e-09, 5.263e-09, 5.43e-09, 5.603e-09, 5.781e-09, 5.964e-09, 6.153e-09, 6.349e-09, 6.55e-09, 6.758e-09, 6.973e-09, 7.194e-09, 7.422e-09, 7.658e-09, 7.901e-09, 8.152e-09, 8.411e-09, 8.678e-09, 8.953e-09, 9.237e-09, 9.531e-09, 9.833e-09, 1.0145e-08, 1.0467e-08, 1.08e-08, 1.1142e-08, 1.1496e-08, 1.1861e-08, 1.2238e-08, 1.2626e-08, 1.3027e-08, 1.344e-08, 1.3867e-08, 1.4307e-08, 1.4761e-08, 1.523e-08, 1.5713e-08, 1.6212e-08, 1.6727e-08, 1.7258e-08, 1.7806e-08, 1.8371e-08, 1.8954e-08, 1.9556e-08, 2.0176e-08, 2.0817e-08, 2.1478e-08, 2.2159e-08, 2.2863e-08, 2.3589e-08, 2.4337e-08, 2.511e-08, 2.5907e-08, 2.6729e-08, 2.7578e-08, 2.8453e-08, 2.9357e-08, 3.0288e-08, 3.125e-08, 3.2242e-08, 3.3265e-08, 3.4321e-08, 3.5411e-08, 3.6535e-08, 3.7695e-08, 3.8891e-08, 4.0126e-08, 4.1399e-08, 4.2714e-08, 4.4069e-08, 4.5468e-08, 4.6912e-08, 4.8401e-08, 4.9937e-08, 5.1522e-08, 5.3158e-08, 5.4845e-08, 5.6586e-08, 5.8382e-08, 6.0236e-08, 6.2148e-08, 6.4121e-08, 6.6156e-08, 6.8256e-08, 7.0423e-08, 7.2658e-08, 7.4965e-08, 7.7344e-08, 7.9799e-08, 8.2332e-08, 8.4946e-08, 8.7642e-08, 9.0425e-08, 9.3295e-08, 9.6256e-08, 9.9312e-08, 1.02464e-07, 1.05717e-07, 1.09073e-07, 1.12535e-07, 1.16107e-07, 1.19793e-07, 1.23596e-07, 1.27519e-07, 1.31567e-07, 1.35743e-07, 1.40052e-07, 1.44498e-07, 1.49085e-07, 1.53817e-07, 1.587e-07, 1.63738e-07, 1.68935e-07, 1.74298e-07, 1.79831e-07, 1.85539e-07, 1.91429e-07, 1.97505e-07, 2.03775e-07, 2.10243e-07, 2.16917e-07, 2.23803e-07, 2.30907e-07, 2.38237e-07, 2.45799e-07, 2.53602e-07, 2.61652e-07, 2.69958e-07, 2.78527e-07, 2.87369e-07, 2.96491e-07, 3.05902e-07, 3.15613e-07, 3.25631e-07, 3.35968e-07, 3.46633e-07, 3.57636e-07, 3.68989e-07, 3.80702e-07, 3.92786e-07, 4.05255e-07, 4.18119e-07, 4.31391e-07, 4.45085e-07, 4.59214e-07, 4.73791e-07, 4.88831e-07, 5.04348e-07, 5.20357e-07, 5.36875e-07, 5.53918e-07, 5.71501e-07, 5.89642e-07, 6.08359e-07, 6.27671e-07, 6.47595e-07, 6.68152e-07, 6.89362e-07, 7.11244e-07, 7.33822e-07, 7.57116e-07, 7.81149e-07, 8.05945e-07, 8.31529e-07, 8.57924e-07, 8.85158e-07, 9.13256e-07, 9.42245e-07, 9.72156e-07, 1.003015e-06, 1.034854e-06, 1.067704e-06, 1.101597e-06, 1.136565e-06, 1.172643e-06, 1.209867e-06, 1.248272e-06, 1.287897e-06, 1.328779e-06, 1.370959e-06, 1.414478e-06, 1.459378e-06, 1.505704e-06, 1.5535e-06, 1.602814e-06, 1.653692e-06, 1.706186e-06, 1.760346e-06, 1.816226e-06, 1.873879e-06, 1.933362e-06, 1.994734e-06, 2.058053e-06, 2.123383e-06, 2.190786e-06, 2.260329e-06, 2.33208e-06, 2.406108e-06, 2.482486e-06, 2.561289e-06, 2.642593e-06, 2.726478e-06, 2.813025e-06, 2.90232e-06, 2.99445e-06, 3.089504e-06, 3.187575e-06, 3.28876e-06, 3.393156e-06, 3.500867e-06, 3.611996e-06, 3.726653e-06, 3.84495e-06, 3.967002e-06, 4.092928e-06, 4.222851e-06, 4.356899e-06, 4.495202e-06, 4.637895e-06, 4.785117e-06, 4.937013e-06, 5.093731e-06, 5.255423e-06, 5.422248e-06, 5.594369e-06, 5.771953e-06, 5.955175e-06, 6.144212e-06, 6.339251e-06, 6.54048e-06, 6.748097e-06, 6.962305e-06, 7.183312e-06, 7.411335e-06, 7.646596e-06, 7.889325e-06, 8.139759e-06, 8.398143e-06, 8.664728e-06, 8.939776e-06, 9.223555e-06, 9.516342e-06, 9.818423e-06, 1.0130094e-05, 1.0451657e-05, 1.0783429e-05, 1.1125731e-05, 1.14789e-05, 1.1843279e-05, 1.2219225e-05, 1.2607105e-05, 1.3007298e-05, 1.3420194e-05, 1.3846196e-05, 1.4285722e-05, 1.4739199e-05, 1.5207072e-05, 1.5689796e-05, 1.6187843e-05, 1.6701701e-05, 1.723187e-05, 1.7778868e-05, 1.834323e-05, 1.8925506e-05, 1.9526266e-05, 2.0146097e-05, 2.0785602e-05, 2.1445408e-05, 2.2126159e-05, 2.2828518e-05, 2.3553173e-05, 2.4300831e-05, 2.5072222e-05, 2.58681e-05, 2.6689242e-05, 2.7536449e-05, 2.841055e-05, 2.9312398e-05, 3.0242873e-05, 3.1202885e-05, 3.2193371e-05, 3.3215298e-05, 3.4269665e-05, 3.5357501e-05, 3.6479868e-05, 3.7637864e-05, 3.8832618e-05, 4.0065297e-05, 4.1337106e-05, 4.2649287e-05, 4.4003121e-05, 4.539993e-05, 4.6841078e-05, 4.8327974e-05, 4.9862068e-05, 5.144486e-05, 5.3077895e-05, 5.4762769e-05, 5.6501125e-05, 5.8294664e-05, 6.0145135e-05, 6.2054347e-05, 6.4024163e-05, 6.6056508e-05, 6.8153367e-05, 7.0316787e-05, 7.2548881e-05, 7.485183e-05, 7.7227882e-05, 7.9679358e-05, 8.2208652e-05, 8.4818235e-05, 8.7510655e-05, 9.0288541e-05, 9.3154607e-05, 9.6111652e-05, 9.9162563e-05, 0.000102310321, 0.000105557999, 0.00010890877, 0.000112365905, 0.000115932782, 0.000119612884, 0.000123409804, 0.000127327252, 0.000131369053, 0.000135539154, 0.000139841629, 0.000144280678, 0.000148860639, 0.000153585983, 0.000158461325, 0.000163491428, 0.000168681203, 0.000174035719, 0.000179560205, 0.000185260058, 0.000191140844, 0.000197208305, 0.000203468369, 0.000209927148, 0.000216590951, 0.000223466286, 0.000230559868, 0.000237878623, 0.000245429702, 0.000253220477, 0.000261258557, 0.000269551794, 0.000278108287, 0.000286936391, 0.00029604473, 0.000305442199, 0.000315137975, 0.000325141528, 0.000335462628, 0.000346111355, 0.000357098109, 0.000368433619, 0.000380128958, 0.000392195546, 0.000404645169, 0.000417489986, 0.000430742541, 0.000444415777, 0.000458523048, 0.000473078132, 0.000488095244, 0.00050358905, 0.000519574682, 0.000536067753, 0.00055308437, 0.000570641153, 0.000588755247, 0.000607444345, 0.000626726698, 0.000646621139, 0.000667147098, 0.00068832462, 0.000710174389, 0.000732717744, 0.000755976702, 0.000779973978, 0.00080473301, 0.000830277978, 0.00085663383, 0.000883826307, 0.000911881966, 0.000940828206, 0.0009706933, 0.001001506412, 0.001033297639, 0.001066098027, 0.001099939611, 0.001134855442, 0.001170879621, 0.00120804733, 0.001246394868, 0.001285959689, 0.001326780431, 0.001368896963, 0.001412350417, 0.001457183232, 0.001503439193, 0.001551163476, 0.00160040269, 0.001651204925, 0.001703619796, 0.001757698493, 0.001813493833, 0.001871060306, 0.001930454136, 0.001991733329, 0.002054957731, 0.002120189092, 0.002187491118, 0.00225692954, 0.002328572174, 0.00240248899, 0.002478752177, 0.002557436217, 0.002638617957, 0.002722376682, 0.002808794195, 0.002897954893, 0.002989945856, 0.003084856926, 0.003182780797, 0.003283813104, 0.003388052522, 0.003495600854, 0.003606563136, 0.003721047739, 0.003839166474, 0.0039610347, 0.004086771438, 0.00421649949, 0.004350345551, 0.004488440343, 0.004630918734, 0.004777919874, 0.004929587332, 0.005086069231, 0.005247518399, 0.005414092514, 0.005585954259, 0.005763271481, 0.005946217356, 0.006134970557, 0.006329715427, 0.006530642164, 0.006737946999, 0.006951832396, 0.007172507245, 0.007400187065, 0.007635094219, 0.007877458126, 0.008127515489, 0.008385510525, 0.008651695203, 0.00892632949, 0.009209681604, 0.00950202828, 0.009803655036, 0.010114856453, 0.010435936463, 0.010767208647, 0.011108996538, 0.011461633942, 0.011825465259, 0.012200845822, 0.012588142242, 0.012987732771, 0.013400007665, 0.013825369569, 0.014264233909, 0.014717029299, 0.015184197957, 0.01566619614, 0.016163494588, 0.016676578983, 0.017205950426, 0.017752125922, 0.018315638889, 0.018897039678, 0.019496896109, 0.020115794027, 0.020754337874, 0.021413151278, 0.022092877665, 0.022794180884, 0.023517745856, 0.024264279246, 0.02503451015, 0.025829190807, 0.026649097336, 0.027495030493, 0.02836781645, 0.029268307607, 0.030197383422, 0.03115595127, 0.032144947327, 0.033165337489, 0.034218118312, 0.035304317985, 0.036424997337, 0.037581250871, 0.038774207832, 0.040005033311, 0.041274929386, 0.042585136289, 0.043936933623, 0.045331641612, 0.046770622384, 0.048255281309, 0.049787068368, 0.051367479567, 0.052998058403, 0.054680397368, 0.056416139504, 0.05820698001, 0.060054667895, 0.061961007691, 0.063927861207, 0.065957149356, 0.068050854025, 0.070211020015, 0.072439757034, 0.074739241761, 0.077111719968, 0.079559508718, 0.082084998624, 0.084690656185, 0.087379026195, 0.090152734231, 0.093014489211, 0.095967086045, 0.099013408364, 0.102156431333, 0.105399224562, 0.108744955097, 0.11219689052, 0.115758402136, 0.119432968267, 0.123224177647, 0.127135732932, 0.13117145431, 0.135335283237, 0.139631286281, 0.144063659101, 0.148636730538, 0.153354966845, 0.158222976049, 0.163245512454, 0.168427481278, 0.17377394345, 0.17929012055, 0.184981399907, 0.190853339864, 0.196911675204, 0.203162322752, 0.209611387151, 0.21626516683, 0.223130160148, 0.230213071747, 0.237520819095, 0.245060539246, 0.252839595805, 0.260865586126, 0.269146348729, 0.277689970954, 0.28650479686, 0.295599435377, 0.304982768711, 0.314663961018, 0.324652467358, 0.334958042925, 0.345590752577, 0.356560980664, 0.367879441171, 0.379557188183, 0.391605626677, 0.404036523663, 0.416862019679, 0.43009464064, 0.443747310081, 0.457833361772, 0.472366552741, 0.487361076714, 0.502831577971, 0.518793165654, 0.535261428519, 0.552252450163, 0.569782824731, 0.587869673122, 0.606530659713, 0.625784009605, 0.645648526428, 0.666143610703, 0.687289278791, 0.709106182437, 0.731615628947, 0.754839601989, 0.778800783071, 0.803522573689, 0.82902911818, 0.855345327307, 0.882496902585, 0.91051036138, 0.939413062813, 0.969233234476}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table_latency.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table_latency.tb new file mode 100644 index 0000000000..e50bd9f2a0 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table_latency.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t exp_table_latency[1024] = {1.0, 1.064494458918, 1.133148453067, 1.206230249421, 1.284025416688, 1.366837941174, 1.454991414618, 1.548830298634, 1.6487212707, 1.75505465696, 1.868245957432, 1.988737469582, 2.117000016613, 2.253534787213, 2.398875293967, 2.553589458063, 2.718281828459, 2.893595944172, 3.080216848918, 3.278873767939, 3.490342957462, 3.715450737941, 3.955076722921, 4.210157256144, 4.481689070338, 4.770733181968, 5.07841903718, 5.405948925141, 5.754602676006, 6.125742661882, 6.52081912033, 6.941375821197, 7.389056098931, 7.865609273945, 8.372897488127, 8.912902981199, 9.487735836359, 10.09964222548, 10.751013186076, 11.444393964331, 12.182493960703, 12.968197316969, 13.804574186067, 14.694892728789, 15.642631884188, 16.65149496361, 17.725424121462, 18.868615759265, 20.085536923188, 21.380942759123, 22.759895093527, 24.227782212611, 25.790339917193, 27.45367393546, 29.224283781235, 31.109088150968, 33.115451958692, 35.251215114589, 37.524723159601, 39.944859875822, 42.521082000063, 45.263456176259, 48.182698291099, 51.290215346586, 54.598150033144, 58.119428177448, 61.867809250368, 65.857940132404, 70.105412346688, 74.626822983201, 79.439839552261, 84.563269020706, 90.017131300522, 95.822737477087, 102.0027730827, 108.58138674079, 115.584284527188, 123.038830417177, 130.974153210819, 139.421260354375, 148.413159102577, 157.984985495187, 168.174141651845, 179.020441921657, 190.56626845863, 202.856736830865, 215.939872310614, 229.866797534079, 244.69193226422, 260.473206037167, 277.272284523134, 295.154810486372, 314.190660285694, 334.454216917865, 356.024660670779, 378.986278522156, 403.428793492735, 429.447715240934, 457.144713268909, 486.628014198347, 518.012824668342, 551.421781507839, 586.985430941709, 624.842738702961, 665.141633044362, 708.039582771299, 753.704212554561, 802.313957927379, 854.058762526152, 909.140820299332, 967.775365584677, 1030.191514142094, 1096.633158428459, 1167.359920612685, 1242.648167054996, 1322.792088214477, 1408.104848204696, 1498.919808489272, 1595.591830499049, 1698.498662260842, 1808.042414456063, 1924.651131676947, 2048.780465020098, 2180.915452553049, 2321.572414611057, 2471.300971330025, 2630.686190299136, 2800.350872725163, 2980.957987041728, 3173.213259472856, 3377.867931673535, 3595.721696222809, 3827.625821439906, 4074.4864777337, 4337.26827848327, 4616.998049285644, 4914.768840299134, 5231.744197360583, 5569.162708566004, 5928.342844080489, 6310.688108089024, 6717.692523019596, 7150.946467468294, 7612.142890638241, 8103.083927575384, 8625.687941050362, 9181.99701760271, 9774.184947038395, 10404.565716560723, 11075.602552725617, 11789.917546552919, 12550.301899404027, 13359.726829661873, 14221.355182831325, 15138.553790426726, 16114.906625939208, 17154.228809290984, 18260.581514499365, 19438.287838802466, 20691.949695255636, 22026.465794806718, 23447.050788115514, 24959.255641914595, 26568.989329532407, 28282.541920334977, 30106.60915830866, 32048.31862582525, 34115.257594825, 36315.502674246636, 38657.65136955225, 41150.85567766677, 43804.85784860481, 46630.028453524326, 49637.40690795877, 52838.744608573186, 56246.55085200208, 59874.14171519782, 63735.692088290736, 67846.29106328034, 72222.00089499021, 76879.91976467776, 81838.2485915491, 87116.36215324633, 92734.88479321224, 98715.7710107605, 105082.39124875881, 111859.62321414231, 119073.94908809406, 126753.55900574342, 134928.46120973182, 143630.59930807285, 152893.97709449488, 162754.79141900392, 173251.57362786165, 184425.34012565826, 196319.7526478048, 208981.28886971297, 222459.42401932197, 236806.8242026268, 252079.55219763189, 268337.2865208745, 285643.5546225249, 304065.98112127866, 323676.55204902356, 344551.8961378237, 366773.5842483551, 390428.44810981676, 415608.9196167989, 442413.3920089205, 470946.6043445507, 501320.0507709557, 533652.4161901023, 568070.0400224912, 604707.4098811887, 643707.6870850961, 685223.2660649162, 729416.3698477013, 776459.683946858, 826537.0311345428, 879844.0897331391, 936589.1582325544, 996993.9692210965, 1061294.55581038, 1129742.1739398406, 1202604.2841647768, 1280165.5967642837, 1362729.184252855, 1450617.6656428187, 1544174.4670851405, 1643765.1638145708, 1749778.908642818, 1862629.9525816191, 1982759.2635375687, 2110636.2494037976, 2246760.5922815157, 2391664.2009986816, 2545913.289555306, 2710110.589616963, 2884897.7057018704, 3070957.6222644863, 3269017.3724721107, 3479850.879102782, 3704281.9786653533, 3943187.640558553, 4197501.3938479675, 4468216.975051153, 4756392.211184672, 5063153.1532461485, 5389698.476283012, 5737304.163241297, 6107328.490896726, 6501217.337350737, 6920509.831830581, 7366844.36887022, 7841965.010372585, 8347728.30056935, 8886110.520507872, 9459215.410412326, 10069282.390094347, 10718695.309534611, 11409991.763828445, 12145873.008893793, 12929214.51668743, 13763077.211174121, 14650719.428953517, 15595609.651281245, 16601440.057234775, 17672140.95098341, 18811896.11953723, 20025159.180985764, 21316670.987107445, 22691478.148350973, 24154952.7535753, 25712813.361603595, 27371147.346616127, 29136434.684697136, 31015573.27448223, 33015905.890847184, 35145248.87696034, 37411922.68681341, 39824784.39757623, 42393262.318818316, 45127392.83383338, 48037859.61702515, 51136035.38059728, 54434026.313673414, 57944719.3874943, 61681832.71153795, 65659969.13733051, 69894673.31940599, 74402492.45638163, 79201040.94949608, 84309069.23126505, 89746537.03320384, 95534691.37891196, 101696149.60727958, 108254987.75023076, 115236834.61034139, 122668971.90594222, 130580440.87502606, 139002155.7545164, 147967024.57831997, 157510077.7661843, 167668605.00582433, 178482300.96318725, 189993420.39022255, 202246943.23624337, 215290750.4080559, 229175810.8656434, 243956380.78448477, 259690215.5627394, 276438795.50172055, 294267566.0415088, 313246193.49043137, 333448837.24767584, 354954439.5827541, 377847034.1041358, 402216074.12240005, 428156782.1909899, 455770522.19040954, 485165195.4097903, 516455662.17352223, 549764190.6604683, 585220934.6695302, 622964442.1984454, 663142196.8231003, 705911193.9928068, 751438554.493433, 799902177.4755054, 851491435.5990057, 906407915.0111549, 964866203.0486645, 1027094726.7424176, 1093336645.4010565, 1163850800.761265, 1238912728.4174805, 1318815734.4832146, 1403872041.691069, 1494414009.409845, 1590795432.3460019, 1693392923.0041597, 1802607383.3086452, 1918865571.1364748, 2042621767.883031, 2174359553.5764885, 2314593696.4772825, 2463872164.546273, 2622778266.64146, 2791932931.8100224, 2971997135.5820627, 3163674482.7468557, 3367713956.703853, 3584912846.131592, 3816119860.410532, 4062238445.9734063, 4324230316.541787, 4603119211.043354, 4899994893.893999, 5216017413.275967, 5552421634.051333, 5910522063.023291, 6291717985.400048, 6697498932.532188, 7129450502.288794, 7589260554.81557, 8078725807.885054, 8599758857.610348, 9154395651.955996, 9744803446.248903, 10373289271.775618, 11042308950.557222, 11754476691.527246, 12512575305.609886, 13319567079.614166, 14178605351.434013, 15093046831.784616, 16066464720.622478, 17102662669.501904, 18205689644.4261, 19379855747.26984, 20629749057.596176, 21960253560.677063, 23376568231.771927, 24884227351.236477, 26489122129.84347, 28197523728.816822, 30016107764.530365, 31951980393.623913, 34012706080.464737, 36206337155.4565, 38541445279.69526, 41027154938.921486, 43673179097.646416, 46489857152.77189, 49488195335.00849, 52679909715.9612, 56077471988.9338, 59694158202.3415, 63544100636.158615, 67642343024.10967, 72004899337.38588, 76648816359.58551, 81592240297.39134, 86854487687.26756, 92456120875.24577, 98419028364.73895, 104766510346.34425, 111523369743.84406, 118716009132.16965, 126372533906.03659, 134522862102.38527, 143198841305.76044, 152434373093.43985, 162265545506.58438, 172730774065.04282, 183870951876.83078, 195729609428.83878, 208353084683.15567, 221790704143.66275, 236094975600.4193, 251321793304.9936, 267530656378.46527, 284784901305.5342, 303151949423.2106, 322703570371.15485, 343516162533.1038, 365671051565.2158, 389254808177.8391, 414359586412.444, 441083483735.5426, 469530924356.67084, 499813067268.2567, 532048240601.79865, 566362403997.6107, 602889640794.8547, 641772681965.1016, 683163463836.7042, 727223721789.3031, 774125622238.336, 824052435379.0488, 877199251318.7649, 933773742395.7201, 993996974663.2367, 1058104271710.1313, 1126346134192.752, 1198989218671.7363, 1276317379578.317, 1358632778381.6807, 1446257064291.475, 1539532631109.0857, 1638823955138.8547, 1744519019387.1614, 1857030829614.4512, 1976799028164.2188, 2104291611875.0205, 2240006760788.29, 2384474784797.6777, 2538260195846.4834, 2701963913770.3423, 2876225614404.5425, 3061726229131.251, 3259190605633.689, 3469390340254.2046, 3693146793023.7476, 3931334297144.042, 4184883575463.5703, 4454785377297.33, 4742094349801.313, 5047933159029.1875, 5373496876774.296, 5720057650338.662, 6088969673476.217, 6481674477934.32, 6899706566270.394, 7344699407954.004, 7818391822184.32, 8322634772363.915, 8859398598778.488, 9430780717744.348, 10039013817308.252, 10686474581524.463, 11375692977399.34, 12109362140792.404, 12890348899903.223, 13721704977464.906, 14606678915417.002, 15548728768653.729, 16551535617448.605, 17619017951355.633, 18755346980792.363, 19964962936135.277, 21252592418016.44, 22623266866618.21, 24082342222135.09, 25635519853226.414, 27288868835238.293, 29048849665247.426, 30922339506593.8, 32916659061545.914, 35039601177103.973, 37299461295718.88, 39705069869913.914, 42265826867469.81, 44991738502003.2, 47893456332463.73, 50982318884332.1, 54270395955154.87, 57770535777540.57, 61496415223907.89, 65462593249161.84, 69684567780126.45, 74178836274030.61, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 1e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 2e-12, 3e-12, 3e-12, 3e-12, 3e-12, 3e-12, 4e-12, 4e-12, 4e-12, 4e-12, 5e-12, 5e-12, 5e-12, 5e-12, 6e-12, 6e-12, 7e-12, 7e-12, 7e-12, 8e-12, 8e-12, 9e-12, 1e-11, 1e-11, 1.1e-11, 1.2e-11, 1.2e-11, 1.3e-11, 1.4e-11, 1.5e-11, 1.6e-11, 1.7e-11, 1.8e-11, 1.9e-11, 2e-11, 2.2e-11, 2.3e-11, 2.4e-11, 2.6e-11, 2.8e-11, 2.9e-11, 3.1e-11, 3.3e-11, 3.5e-11, 3.8e-11, 4e-11, 4.3e-11, 4.6e-11, 4.8e-11, 5.2e-11, 5.5e-11, 5.8e-11, 6.2e-11, 6.6e-11, 7.1e-11, 7.5e-11, 8e-11, 8.5e-11, 9.1e-11, 9.6e-11, 1.03e-10, 1.09e-10, 1.16e-10, 1.24e-10, 1.32e-10, 1.4e-10, 1.49e-10, 1.59e-10, 1.69e-10, 1.8e-10, 1.92e-10, 2.04e-10, 2.17e-10, 2.31e-10, 2.46e-10, 2.62e-10, 2.79e-10, 2.97e-10, 3.16e-10, 3.36e-10, 3.58e-10, 3.81e-10, 4.06e-10, 4.32e-10, 4.6e-10, 4.9e-10, 5.21e-10, 5.55e-10, 5.91e-10, 6.29e-10, 6.69e-10, 7.12e-10, 7.58e-10, 8.07e-10, 8.59e-10, 9.15e-10, 9.74e-10, 1.036e-09, 1.103e-09, 1.174e-09, 1.25e-09, 1.331e-09, 1.417e-09, 1.508e-09, 1.605e-09, 1.709e-09, 1.819e-09, 1.936e-09, 2.061e-09, 2.194e-09, 2.336e-09, 2.486e-09, 2.647e-09, 2.817e-09, 2.999e-09, 3.192e-09, 3.398e-09, 3.617e-09, 3.851e-09, 4.099e-09, 4.363e-09, 4.645e-09, 4.944e-09, 5.263e-09, 5.603e-09, 5.964e-09, 6.349e-09, 6.758e-09, 7.194e-09, 7.658e-09, 8.152e-09, 8.678e-09, 9.237e-09, 9.833e-09, 1.0467e-08, 1.1142e-08, 1.1861e-08, 1.2626e-08, 1.344e-08, 1.4307e-08, 1.523e-08, 1.6212e-08, 1.7258e-08, 1.8371e-08, 1.9556e-08, 2.0817e-08, 2.2159e-08, 2.3589e-08, 2.511e-08, 2.6729e-08, 2.8453e-08, 3.0288e-08, 3.2242e-08, 3.4321e-08, 3.6535e-08, 3.8891e-08, 4.1399e-08, 4.4069e-08, 4.6912e-08, 4.9937e-08, 5.3158e-08, 5.6586e-08, 6.0236e-08, 6.4121e-08, 6.8256e-08, 7.2658e-08, 7.7344e-08, 8.2332e-08, 8.7642e-08, 9.3295e-08, 9.9312e-08, 1.05717e-07, 1.12535e-07, 1.19793e-07, 1.27519e-07, 1.35743e-07, 1.44498e-07, 1.53817e-07, 1.63738e-07, 1.74298e-07, 1.85539e-07, 1.97505e-07, 2.10243e-07, 2.23803e-07, 2.38237e-07, 2.53602e-07, 2.69958e-07, 2.87369e-07, 3.05902e-07, 3.25631e-07, 3.46633e-07, 3.68989e-07, 3.92786e-07, 4.18119e-07, 4.45085e-07, 4.73791e-07, 5.04348e-07, 5.36875e-07, 5.71501e-07, 6.08359e-07, 6.47595e-07, 6.89362e-07, 7.33822e-07, 7.81149e-07, 8.31529e-07, 8.85158e-07, 9.42245e-07, 1.003015e-06, 1.067704e-06, 1.136565e-06, 1.209867e-06, 1.287897e-06, 1.370959e-06, 1.459378e-06, 1.5535e-06, 1.653692e-06, 1.760346e-06, 1.873879e-06, 1.994734e-06, 2.123383e-06, 2.260329e-06, 2.406108e-06, 2.561289e-06, 2.726478e-06, 2.90232e-06, 3.089504e-06, 3.28876e-06, 3.500867e-06, 3.726653e-06, 3.967002e-06, 4.222851e-06, 4.495202e-06, 4.785117e-06, 5.093731e-06, 5.422248e-06, 5.771953e-06, 6.144212e-06, 6.54048e-06, 6.962305e-06, 7.411335e-06, 7.889325e-06, 8.398143e-06, 8.939776e-06, 9.516342e-06, 1.0130094e-05, 1.0783429e-05, 1.14789e-05, 1.2219225e-05, 1.3007298e-05, 1.3846196e-05, 1.4739199e-05, 1.5689796e-05, 1.6701701e-05, 1.7778868e-05, 1.8925506e-05, 2.0146097e-05, 2.1445408e-05, 2.2828518e-05, 2.4300831e-05, 2.58681e-05, 2.7536449e-05, 2.9312398e-05, 3.1202885e-05, 3.3215298e-05, 3.5357501e-05, 3.7637864e-05, 4.0065297e-05, 4.2649287e-05, 4.539993e-05, 4.8327974e-05, 5.144486e-05, 5.4762769e-05, 5.8294664e-05, 6.2054347e-05, 6.6056508e-05, 7.0316787e-05, 7.485183e-05, 7.9679358e-05, 8.4818235e-05, 9.0288541e-05, 9.6111652e-05, 0.000102310321, 0.00010890877, 0.000115932782, 0.000123409804, 0.000131369053, 0.000139841629, 0.000148860639, 0.000158461325, 0.000168681203, 0.000179560205, 0.000191140844, 0.000203468369, 0.000216590951, 0.000230559868, 0.000245429702, 0.000261258557, 0.000278108287, 0.00029604473, 0.000315137975, 0.000335462628, 0.000357098109, 0.000380128958, 0.000404645169, 0.000430742541, 0.000458523048, 0.000488095244, 0.000519574682, 0.00055308437, 0.000588755247, 0.000626726698, 0.000667147098, 0.000710174389, 0.000755976702, 0.00080473301, 0.00085663383, 0.000911881966, 0.0009706933, 0.001033297639, 0.001099939611, 0.001170879621, 0.001246394868, 0.001326780431, 0.001412350417, 0.001503439193, 0.00160040269, 0.001703619796, 0.001813493833, 0.001930454136, 0.002054957731, 0.002187491118, 0.002328572174, 0.002478752177, 0.002638617957, 0.002808794195, 0.002989945856, 0.003182780797, 0.003388052522, 0.003606563136, 0.003839166474, 0.004086771438, 0.004350345551, 0.004630918734, 0.004929587332, 0.005247518399, 0.005585954259, 0.005946217356, 0.006329715427, 0.006737946999, 0.007172507245, 0.007635094219, 0.008127515489, 0.008651695203, 0.009209681604, 0.009803655036, 0.010435936463, 0.011108996538, 0.011825465259, 0.012588142242, 0.013400007665, 0.014264233909, 0.015184197957, 0.016163494588, 0.017205950426, 0.018315638889, 0.019496896109, 0.020754337874, 0.022092877665, 0.023517745856, 0.02503451015, 0.026649097336, 0.02836781645, 0.030197383422, 0.032144947327, 0.034218118312, 0.036424997337, 0.038774207832, 0.041274929386, 0.043936933623, 0.046770622384, 0.049787068368, 0.052998058403, 0.056416139504, 0.060054667895, 0.063927861207, 0.068050854025, 0.072439757034, 0.077111719968, 0.082084998624, 0.087379026195, 0.093014489211, 0.099013408364, 0.105399224562, 0.11219689052, 0.119432968267, 0.127135732932, 0.135335283237, 0.144063659101, 0.153354966845, 0.163245512454, 0.17377394345, 0.184981399907, 0.196911675204, 0.209611387151, 0.223130160148, 0.237520819095, 0.252839595805, 0.269146348729, 0.28650479686, 0.304982768711, 0.324652467358, 0.345590752577, 0.367879441171, 0.391605626677, 0.416862019679, 0.443747310081, 0.472366552741, 0.502831577971, 0.535261428519, 0.569782824731, 0.606530659713, 0.645648526428, 0.687289278791, 0.731615628947, 0.778800783071, 0.82902911818, 0.882496902585, 0.939413062813}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table_legacy.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table_legacy.tb new file mode 100644 index 0000000000..ba1f736594 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/exp_table_legacy.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t exp_table_legacy[1024] = {0.0003354626279025118, 0.0003407453956084444, 0.00034611135480074116, 0.00035156181555468625, 0.0003570981085762476, 0.000362721585526963, 0.0003684336193539422, 0.000374235604625066, 0.00038012895786946363, 0.0003861151179233525, 0.00039219554628132326, 0.00039837172745315756, 0.0004046451693262645, 0.0004110174035338246, 0.0004174899858287315, 0.0004240644964634219, 0.00043074254057568753, 0.00043752574858056214, 0.0004444157765683803, 0.00045141430670910486, 0.0004585230476630206, 0.00046574373499789687, 0.0004730781316127184, 0.00048052802816808926, 0.00048809524352341505, 0.0004957816251809691, 0.0005035890497369525, 0.000511519423339656, 0.0005195746821548384, 0.0005277567928384327, 0.0005360677530166963, 0.0005445095917739227, 0.0005530843701478336, 0.0005617941816327723, 0.0005706411526908208, 0.0005796274432709663, 0.0005887552473364432, 0.0005980267934003796, 0.0006074443450698789, 0.0006170102015986695, 0.0006267266984484576, 0.0006365962078591196, 0.0006466211394278742, 0.000656803940697575, 0.0006671470977542672, 0.0006776531358341546, 0.0006883246199401247, 0.000699164155467983, 0.0007101743888425491, 0.0007213580081637692, 0.0007327177438630043, 0.0007442563693696527, 0.0007559767017882707, 0.0007678816025863562, 0.0007799739782929635, 0.000792256781208319, 0.0008047330101246132, 0.0008174057110581422, 0.0008302779779929783, 0.0008433529536363545, 0.0008566338301859405, 0.0008701238501092051, 0.00088382630693505, 0.0008977445460579097, 0.0009118819655545162, 0.0009262420170135229, 0.000940828206378196, 0.000955644094802374, 0.000970693299519909, 0.0009859794947277968, 0.0010015064124832178, 0.0010172778436147007, 0.001033297638647637, 0.0010495697087443705, 0.0010660980266590896, 0.0010828866277077577, 0.0010999396107533182, 0.001117261139206414, 0.001134855442041865, 0.0011527268148311548, 0.0011708796207911744, 0.001189318291849483, 0.0012080473297263435, 0.0012270713070337978, 0.0012463948683920495, 0.0012660227315634282, 0.001285959688604209, 0.0013062106070345731, 0.0013267804310269915, 0.001347674182613322, 0.0013688969629109162, 0.0013904539533680367, 0.0014123504170288818, 0.0014345916998185367, 0.0014571832318481571, 0.0014801305287407067, 0.0015034391929775726, 0.0015271149152663852, 0.0015511634759303816, 0.0015755907463196436, 0.0016004026902445643, 0.0016256053654318836, 0.0016512049250036575, 0.0016772076189795154, 0.001703619795802574, 0.001730447903889382, 0.0017576984932042732, 0.0017853782168585108, 0.0018134938327346152, 0.0018420522051362717, 0.0018710603064642198, 0.0019005252189185332, 0.0019304541362277093, 0.0019608543654049855, 0.0019917333285323136, 0.00202309856457243, 0.002054957731209459, 0.0020873186067185026, 0.0021201890918646723, 0.002153577211832027, 0.002187491118182885, 0.002221939090847993, 0.002256929540148032, 0.002292471008846966, 0.0023285721742377133, 0.0023652418502606745, 0.0024024889896556126, 0.0024403226861474305, 0.0024787521766663585, 0.0025177868436031133, 0.0025574362170995664, 0.0025977099773754862, 0.0026386179570919216, 0.002680170143751801, 0.00272237668213834, 0.0027652478767918433, 0.0028087941945255128, 0.002853026266980875, 0.00289795489322345, 0.002943591042379293, 0.0029899458563130603, 0.003037030652348247, 0.0030848569260302594, 0.003133436353933003, 0.003182780796509667, 0.003232902300988401, 0.003283813104313592, 0.003335525636133462, 0.0033880525218347116, 0.0034414065856249506, 0.0034956008536636734, 0.003550648557242539, 0.0036065631360157305, 0.0036633582412811897, 0.0037210477393135196, 0.003779645714749375, 0.0038391664740261636, 0.0038996245488749017, 0.003961034699868069, 0.0040234119200233415, 0.004086771438464067, 0.004151128724137389, 0.004216499489590925, 0.004282899694808913, 0.004350345551108769, 0.004418853525099015, 0.004488440342699523, 0.004559122993225079, 0.004630918733533246, 0.004703845092237552, 0.00477791987398702, 0.004853161163813102, 0.004929587331545052, 0.005007217036294841, 0.005086069231012701, 0.0051661631671143975, 0.005247518399181385, 0.005330154789734964, 0.0054140925140856375, 0.005499352065258816, 0.0055859542589981, 0.005673920238847358, 0.005763271481312825, 0.0058540298011064956, 0.005946217356472094, 0.006039856654594909, 0.006134970557096825, 0.006231582285617883, 0.006329715427485746, 0.006429393941474441, 0.006530642163653784, 0.006633484813330925, 0.006737946999085467, 0.0068440542248995985, 0.006951832396384793, 0.007061307827106542, 0.007172507245008699, 0.007285457798938988, 0.0074001870652772766, 0.007516723054668224, 0.0076350942188599625, 0.0077553294576504575, 0.007877458125943277, 0.00800151004091445, 0.008127515489292211, 0.008255505234751359, 0.008385510525424083, 0.008517563101529046, 0.008651695203120634, 0.008787939577960206, 0.008926329489511324, 0.009066898725060887, 0.009209681603968138, 0.00935471298604359, 0.00950202828005989, 0.00965166345239672, 0.009803655035821828, 0.009958040138410345, 0.010114856452604551, 0.010274142264416326, 0.010435936462774504, 0.010600278549019446, 0.0107672086465471, 0.010936767510604966, 0.011108996538242308, 0.011283937778417049, 0.011461633942261854, 0.011642128413511858, 0.011825465259096618, 0.012011689239898848, 0.012200845821682602, 0.012392981186193542, 0.012588142242433998, 0.012786376638115597, 0.012987732771292249, 0.013192259802176308, 0.013400007665140828, 0.013611027080910821, 0.013825369568946493, 0.014043087460021496, 0.014264233908999256, 0.014488862907810497, 0.014717029298635137, 0.014948788787291759, 0.015184197956837946, 0.015423314281384779, 0.015666196140128895, 0.015912902831605506, 0.016163494588165874, 0.016418032590682783, 0.016676578983487563, 0.016939196889542374, 0.017205950425851383, 0.01747690471911465, 0.017752125921628545, 0.018031681227436498, 0.01831563888873418, 0.018604068232532935, 0.01889703967758568, 0.019194624751579337, 0.019496896108597995, 0.019803927546861083, 0.02011579402674089, 0.0204325716890638, 0.020754337873699742, 0.02108117113844438, 0.021413151278198653, 0.021750359344450344, 0.022092877665062443, 0.02244078986437315, 0.022794180883612347, 0.023153137001639675, 0.02351774585600911, 0.023888096464365276, 0.02426427924617674, 0.024646386044811498, 0.025034510149960144, 0.025428746320412143, 0.025829190807190754, 0.026235941377052298, 0.026649097336355485, 0.027068759555306635, 0.02749503049258667, 0.027928014220365963, 0.0283678164497131, 0.028814544556403775, 0.029268307607136092, 0.02972921638615875, 0.0301973834223185, 0.03067292301653359, 0.03115595126969981, 0.03164658611103602, 0.03214494732687607, 0.03265115658991408, 0.03316533748891028, 0.03368761555886473, 0.03421811831166604, 0.034756975267222885, 0.03530431798508576, 0.035860280096566576, 0.036424997337364234, 0.03699860758070385, 0.037581250870997916, 0.03817306945803752, 0.03877420783172201, 0.039384812757335554, 0.04000503331137926, 0.040635020917967515, 0.04127492938579755, 0.04192491494570112, 0.04258513628878761, 0.04325575460518773, 0.04393693362340741, 0.04462883965030139, 0.04533164161167627, 0.046045511093533026, 0.04677062238395899, 0.047507152515679496, 0.04825528130927964, 0.049015191417106724, 0.04978706836786394, 0.050571100611906464, 0.05136747956725073, 0.05217639966630833, 0.0529980584033558, 0.053832656382752, 0.05468039736791477, 0.05554148833106887, 0.05641613950377735, 0.057304564428268645, 0.058206980009571974, 0.059123606568473756, 0.060054667895307945, 0.06100039130459345, 0.061961007690531984, 0.06293675158337987, 0.06392786120670757, 0.0649345785355609, 0.06595714935553816, 0.06699582332279769, 0.0680508540250102, 0.06912249904327114, 0.07021102001498805, 0.07131668269775802, 0.07243975703425146, 0.07358051721811716, 0.0747392417609257, 0.0759162135601666, 0.07711171996831671, 0.07832605286299588, 0.07955950871822769, 0.08081238867682222, 0.0820849986238988, 0.08337764926156645, 0.08469065618478049, 0.0860243399583936, 0.08737902619542037, 0.08875504563653425, 0.09015273423081639, 0.09157243321777621, 0.09301448921066349, 0.09447925428109244, 0.09596708604499847, 0.0974783477499485, 0.0990134083638263, 0.10057264266491435, 0.1021564313333943, 0.10376516104428836, 0.10539922456186433, 0.10705902083552722, 0.10874495509722103, 0.1104574389603643, 0.11219689052034373, 0.1139637344565904, 0.11575840213626322, 0.11758133171956547, 0.11943296826671962, 0.12131376384662682, 0.1232241776472375, 0.1251646760876602, 0.1271357329320356, 0.129137829405204, 0.13117145431019425, 0.13323710414756296, 0.13533528323661267, 0.13746650383851944, 0.13963128628139898, 0.14183015908734253, 0.14406365910145327, 0.1463323316229146, 0.1486367305381225, 0.1509774184559146, 0.15335496684492847, 0.15576995617312325, 0.1582229760494984, 0.16071462536804407, 0.1632455124539584, 0.1658162552121675, 0.16842748127818424, 0.17107982817134254, 0.1737739434504451, 0.17651048487186194, 0.17929012055011864, 0.18211352912101367, 0.1849813999073043, 0.1878944330870017, 0.19085333986431632, 0.19385884264329525, 0.19691167520419406, 0.2000125828826258, 0.20316232275153173, 0.20636166380601706, 0.20961138715109784, 0.21291228619240427, 0.21626516682988728, 0.21967084765457534, 0.22313016014842982, 0.22664394888734782, 0.23021307174736147, 0.23383840011408472, 0.23752081909545814, 0.24126122773784434, 0.24506053924552593, 0.24891968120366054, 0.25283959580474646, 0.25682124007865503, 0.26086558612628497, 0.2649736213568966, 0.26914634872918386, 0.2733847869961416, 0.27768997095379, 0.28206295169381546, 0.2865047968601901, 0.29101659090983195, 0.2955994353773707, 0.3002544491440824, 0.3049827687110593, 0.30978554847668166, 0.314663961018459, 0.31961919737930994, 0.32465246735834974, 0.3297649998062575, 0.33495804292529496, 0.34023286457404905, 0.3455907525769745, 0.3510330150388101, 0.35656098066394704, 0.36217599908082576, 0.3678794411714424, 0.37367269940604303, 0.3795571881830896, 0.3855343441745787, 0.391605626676799, 0.39777251796661167, 0.4040365236633421, 0.41039917309637, 0.4168620196785084, 0.4234266412852628, 0.43009464064006225, 0.43686764570555725, 0.44374731008107987, 0.4507353134063624, 0.45783336177161427, 0.4650431881340563, 0.4723665527410147, 0.4798052435596776, 0.48736107671361917, 0.4950358969261986, 0.5028315779709409, 0.5107500231290107, 0.5187931656538893, 0.5269629692433709, 0.5352614285189902, 0.5436905695130004, 0.5522524501630204, 0.5609491608144709, 0.569782824730923, 0.5787555986124842, 0.5878696731223465, 0.5971272734216274, 0.6065306597126334, 0.6160821277906782, 0.6257840096045911, 0.635638673826052, 0.645648526427892, 0.6558160112715016, 0.6661436107034878, 0.676633846161729, 0.6872892787909721, 0.6981125100681258, 0.7091061824373984, 0.7202729799554398, 0.7316156289466419, 0.7431368986687583, 0.7548396019890073, 0.7667265960708202, 0.7788007830714049, 0.791065110850296, 0.8035225736890608, 0.8161762130223398, 0.8290291181804004, 0.8420844271433823, 0.8553453273074225, 0.8688150562628432, 0.8824969025845953, 0.8963942066351505, 0.9105103613800342, 0.9248488132162048, 0.9394130628134758, 0.9542066659691884, 0.9692332344763441, 0.9844964370054085, 1.0, 1.0157477085866857, 1.0317434074991028, 1.0479910020166325, 1.0644944589178593, 1.0812578074490395, 1.098285140307826, 1.115580614642481, 1.1331484530668263, 1.1509929446911764, 1.1691184461695043, 1.1875293827631006, 1.2062302494209807, 1.2252256118773075, 1.244520107766095, 1.2641184477534664, 1.2840254166877414, 1.3042458747676378, 1.3247847587288655, 1.3456470830494105, 1.3668379411737963, 1.3883625067566268, 1.4102260349257107, 1.4324338635650782, 1.4549914146182015, 1.4779041954117385, 1.5011778000001228, 1.5248179105313266, 1.5488302986341331, 1.573220826827253, 1.5979954499506333, 1.6231602166193055, 1.6487212707001282, 1.6746848528117841, 1.7010573018484005, 1.7278450565271632, 1.7550546569602985, 1.782692746251815, 1.8107660721193872, 1.8392814885417808, 1.8682459574322223, 1.8976665503381187, 1.9275504501675447, 1.9579049529429182, 1.988737469582292, 2.0200555277086965, 2.0518667734879767, 2.0841789734955687, 2.1170000166126743, 2.1503379159522997, 2.184200810815618, 2.218596968679145, 2.2535347872132085, 2.2890227963322007, 2.325069660277121, 2.3616841797309096, 2.3988752939670976, 2.4366520830322917, 2.475023769963025, 2.5139997230375233, 2.553589458062927, 2.5938026406985344, 2.634649088815631, 2.676138774894477, 2.718281828459045, 2.7610885385501014, 2.8045693562372263, 2.8487348971703996, 2.893595944171761, 2.939163449868194, 2.985448539365356, 3.032462512963828, 3.0802168489180315, 3.128723206238592, 3.177993427538838, 3.2280395419261225, 3.2788737679386735, 3.3305085165287003, 3.382956394092469, 3.4362302055481027, 3.4903429574618414, 3.545307861223541, 3.6011383362721756, 3.6578480133721323, 3.715450737941104, 3.773960573430387, 3.8333918047584103, 3.8937589417983354, 3.955076722920577, 4.017360118591115, 4.080624335026461, 4.1448848179061955, 4.21015725614396, 4.276457585717836, 4.343801993561042, 4.412206921513906, 4.481689070338065, 4.552265403793884, 4.6239531527820805, 4.696769819550579, 4.770733181967603, 4.8458612978620605, 4.922172509432291, 4.999685447724226, 5.0784190371800815, 5.15839250025867, 5.239625362128489, 5.322137455434697, 5.405948925141168, 5.491080233448797, 5.577552164791259, 5.665385830909431, 5.754602676005731, 5.845224481979631, 5.937273373745607, 6.030771824634842, 6.125742661881986, 6.222209072198332, 6.320194607432744, 6.419723190321737, 6.5208191203301125, 6.623507079583559, 6.727812138894691, 6.833759763883972, 6.941375821197035, 7.050686584819912, 7.161718742493711, 7.274499402230307, 7.38905609893065, 7.505416801107283, 7.623609917712736, 7.743664305075444, 7.865609273944893, 7.989474596647709, 8.115290514356445, 8.243087744472826, 8.372897488127265, 8.504751437796486, 8.6386817850411, 8.77472122836504, 8.912902981198737, 9.053260780008058, 9.195828892530894, 9.340642126143496, 9.487735836358526, 9.637145935456955, 9.788908901255894, 9.943061786014486, 10.099642225480054, 10.258688448076699, 10.42023928423861, 10.584334175890335, 10.751013186076355, 10.9203170087423, 11.092286978670202, 11.26696508157019, 11.444393964331121, 11.624616945432635, 11.807678025521156, 11.993621898152476, 12.182493960703473, 12.374340325455691, 12.56920783085344, 12.767144052939209, 12.968197316969134, 13.172416709211404, 13.379852088930456, 13.59055410055989, 13.804574186067095, 14.021964597512564, 14.242778409807016, 14.467069533669397, 14.694892728788941, 14.926303617194517, 15.16135869683449, 15.400115355370454, 15.642631884188173, 15.888967492629167, 16.139182322446413, 16.39333746248769, 16.651494963610144, 16.91371785382974, 17.180070153709273, 17.45061689198871, 17.725424121461643, 18.00455893510183, 18.28808948244362, 18.576084986220376, 18.868615759264884, 19.16575322167593, 19.46756991825522, 19.774139536218833, 20.085536923187668, 20.40183810546114, 20.723120306578686, 21.049461966173517, 21.380942759123343, 21.717643615002626, 22.059646737841184, 22.407035626193938, 22.75989509352673, 23.118311288923127, 23.482371718117378, 23.85216526485852, 24.22778221261098, 24.609314266596865, 24.9968545761854, 25.39049775763493, 25.790339917193062, 26.196478674560588, 26.609013186724894, 27.028044172168716, 27.4536739354601, 27.886006392229614, 28.325147094540903, 28.771203256660744, 29.22428378123494, 29.684499285876434, 30.151962130172098, 30.62678644311483, 31.109088150967665, 31.59898500556662, 32.09659661306934, 32.60204446315635, 33.11545195869231, 33.63694444585419, 34.16664924473404, 34.70469568042362, 35.25121511458855, 35.806340977539655, 36.37020880080946, 36.942956250241515, 37.524723159600995, 38.115651564714454, 38.71588573814723, 39.32557222442699, 39.94485987582193, 40.57389988868238, 41.2128458403547, 41.861853726676614, 42.52108200006278, 43.19069160819034, 43.87084603329353, 44.5617113320772, 45.26345617625877, 45.97625189374871, 46.70027251047953, 47.435694792893365, 48.182698291098816, 48.94146538270724, 49.71218131735948, 50.49503426195374, 51.290215346585704, 52.097918711212095, 52.9183415530491, 53.75168417471723, 54.598150033144236, 55.45794578923834, 56.33128135834348, 57.21836996148928, 58.119428177447986, 59.034675995611245, 59.96433686969955, 60.908637772317434, 61.86780925036789, 62.842085481339346, 63.83170433047908, 64.83690740886694, 65.8579401324037, 66.89505178172818, 67.9484955630781, 69.01852867010916, 70.10541234668786, 71.20941195067294, 72.3307970187014, 73.46984133199462, 74.62682298320091, 75.80202444429054, 76.99573263552006, 78.2082389954826, 79.43983955226133, 80.69083499570341, 81.9615307508321, 83.25223705241488, 84.563269020706, 85.89494673838158, 87.2475953286865, 88.62154503481175, 90.01713130052181, 91.43469485205186, 92.87458178129451, 94.33714363029665, 95.82273747708687, 97.33172602285454, 98.86447768050158, 100.42136666458902, 102.00277308269968, 103.60908302823987, 105.24068867470233, 106.89798837141365, 108.5813867407896, 110.29129477712178, 112.02812994692015, 113.7923162908356, 115.58428452718766, 117.40447215712238, 119.2533235714264, 121.13129015902297, 123.03883041717654, 124.9764100634329, 126.94450214932198, 128.94358717585138, 130.9741532108186, 133.0366960079705, 135.13171912803952, 137.25973406168575, 139.42126035437516, 141.61682573322432, 143.8469662358426, 146.11222634120344, 148.4131591025766, 150.7503262825534, 153.12429849019884, 155.5356553203632, 157.98498549518746, 160.47288700783744, 162.99996726850102, 165.5668432526847, 168.17414165184545, 170.82249902639472, 173.5125619611118, 176.24498722300464, 179.02044192165667, 181.83960367209863, 184.70316076024528, 187.61181231093738, 190.56626845863, 193.56725052076862, 196.6154911738957, 199.71173463253027, 202.85673683086486, 206.05126560732333, 209.29610089202527, 212.59203489720244, 215.9398723106141, 219.34043049200778, 222.79453967267412, 226.3030431581442, 229.86679753407884, 233.4866728754002, 237.16355295871682, 240.8983354780937, 244.69193226422038, 248.54526950703038, 252.45928798182638, 256.43494327896633, 260.4732060371668, 264.57506218047985, 268.7415131590023, 272.9735761933752, 277.272284523134, 281.63868765896893, 286.07385163895896, 290.5788592888401, 295.15481048637224, 299.8028224298701, 304.5240299109616, 309.3195855916426, 314.1906602856942, 319.1384432445317, 324.1641424475551, 329.2689848970721, 334.45421691786504, 339.72110446147576, 345.0709334152821, 350.50500991644157, 356.0246606707791, 361.6312332766962, 367.32609655418145, 373.1106408790015, 378.98627852215554, 384.954443994675, 391.01659439785277, 397.1742097789884, 403.4287934927351, 409.7818725681369, 416.23499808144635, 422.78974553481265, 429.4477152409339, 436.21053271376616, 443.0798490653855, 450.0573414090999, 457.1447132689089, 464.34369499541174, 471.65604418826433, 479.0835461252901, 486.6280141983472, 494.29129035606036, 502.0752455535245, 509.9817802090901, 518.012824668342, 526.170339675385, 534.4563168515505, 542.872779181642, 551.4217815078387, 560.1054110313753, 568.9257878221232, 577.8850653361966, 586.9854309417088, 596.229106452809, 605.6183486721279, 615.1554499417664, 624.8427387029609, 634.6825800645618, 644.6773763804644, 654.829567836133, 665.1416330443618, 675.6160896504167, 686.2554949467076, 697.0624464971402, 708.0395827712994, 719.1895837886203, 730.5151717727034, 742.0191118159327, 753.7042125545613, 765.5733268544279, 777.629352507471, 789.8752329392117, 802.313957927379, 814.9485643318498, 827.7821368360857, 840.8178087002444, 854.0587625261516, 867.5082310343188, 881.1694978531985, 895.045898320867, 909.1408202993323, 923.4577050016666, 938.0000478321625, 952.7713992397206, 967.7753655846766, 983.0156100192775, 998.4958533820242, 1014.2198751060984, 1030.1915141420939, 1046.4146698952802, 1062.893303177624, 1079.631437174805, 1096.6331584284585, 1113.9026178338868, 1131.4440316534813, 1149.261682546105, 1167.359920612685, 1185.7431644582703, 1204.4159022708138, 1223.3826929169447, 1242.6481670549956, 1262.217028265557, 1282.0940541998355, 1302.284097746097, 1322.7920882144774, 1343.6230325404526, 1364.7820165072585, 1386.2742059875643, 1408.1048482046956, 1430.2792730137223, 1452.8028942027195, 1475.6812108145175, 1498.919808489272, 1522.524360828172, 1546.5006307786239, 1570.8544720412515, 1595.591830499049, 1620.7187456690444, 1646.2413521768196, 1672.1658812542516, 1698.4986622608421, 1725.2461242290015, 1752.4147974336688, 1780.0113149866502, 1808.0424144560632, 1836.514939511285, 1865.4358415938036, 1894.8121816143816, 1924.6511316769472, 1954.9599768296307, 1985.7461168433776, 2017.0170680185702, 2048.780465020098, 2081.0440627413286, 2113.8157381974315, 2147.1034924485148, 2180.9154525530494, 2215.2598735520546, 2250.1451404845307, 2285.5797704346282, 2321.572414611057, 2358.1318604592398, 2395.267033806731, 2432.9870010424147, 2471.3009713300253, 2510.2182988565237, 2549.7484851158824, 2589.901181228831, 2630.6861902991354, 2672.113469806985, 2714.1931340400633, 2756.935456562909, 2800.3508727251633, 2844.4499822093103, 2889.243551618546, 2934.742517105396}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table.tb new file mode 100644 index 0000000000..9debe420a1 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t invert_table[1024] = {1.7976931348623157e+308, 8.0, 4.0, 2.666666666667, 2.0, 1.6, 1.333333333333, 1.142857142857, 1.0, 0.888888888889, 0.8, 0.727272727273, 0.666666666667, 0.615384615385, 0.571428571429, 0.533333333333, 0.5, 0.470588235294, 0.444444444444, 0.421052631579, 0.4, 0.380952380952, 0.363636363636, 0.347826086957, 0.333333333333, 0.32, 0.307692307692, 0.296296296296, 0.285714285714, 0.275862068966, 0.266666666667, 0.258064516129, 0.25, 0.242424242424, 0.235294117647, 0.228571428571, 0.222222222222, 0.216216216216, 0.210526315789, 0.205128205128, 0.2, 0.19512195122, 0.190476190476, 0.186046511628, 0.181818181818, 0.177777777778, 0.173913043478, 0.170212765957, 0.166666666667, 0.163265306122, 0.16, 0.156862745098, 0.153846153846, 0.150943396226, 0.148148148148, 0.145454545455, 0.142857142857, 0.140350877193, 0.137931034483, 0.135593220339, 0.133333333333, 0.131147540984, 0.129032258065, 0.126984126984, 0.125, 0.123076923077, 0.121212121212, 0.119402985075, 0.117647058824, 0.115942028986, 0.114285714286, 0.112676056338, 0.111111111111, 0.109589041096, 0.108108108108, 0.106666666667, 0.105263157895, 0.103896103896, 0.102564102564, 0.101265822785, 0.1, 0.098765432099, 0.09756097561, 0.096385542169, 0.095238095238, 0.094117647059, 0.093023255814, 0.091954022989, 0.090909090909, 0.089887640449, 0.088888888889, 0.087912087912, 0.086956521739, 0.086021505376, 0.085106382979, 0.084210526316, 0.083333333333, 0.082474226804, 0.081632653061, 0.080808080808, 0.08, 0.079207920792, 0.078431372549, 0.077669902913, 0.076923076923, 0.07619047619, 0.075471698113, 0.07476635514, 0.074074074074, 0.073394495413, 0.072727272727, 0.072072072072, 0.071428571429, 0.070796460177, 0.070175438596, 0.069565217391, 0.068965517241, 0.068376068376, 0.067796610169, 0.067226890756, 0.066666666667, 0.066115702479, 0.065573770492, 0.065040650407, 0.064516129032, 0.064, 0.063492063492, 0.062992125984, 0.0625, 0.062015503876, 0.061538461538, 0.06106870229, 0.060606060606, 0.06015037594, 0.059701492537, 0.059259259259, 0.058823529412, 0.058394160584, 0.057971014493, 0.057553956835, 0.057142857143, 0.056737588652, 0.056338028169, 0.055944055944, 0.055555555556, 0.055172413793, 0.054794520548, 0.054421768707, 0.054054054054, 0.053691275168, 0.053333333333, 0.05298013245, 0.052631578947, 0.052287581699, 0.051948051948, 0.051612903226, 0.051282051282, 0.050955414013, 0.050632911392, 0.050314465409, 0.05, 0.049689440994, 0.049382716049, 0.049079754601, 0.048780487805, 0.048484848485, 0.048192771084, 0.047904191617, 0.047619047619, 0.047337278107, 0.047058823529, 0.046783625731, 0.046511627907, 0.046242774566, 0.045977011494, 0.045714285714, 0.045454545455, 0.045197740113, 0.044943820225, 0.04469273743, 0.044444444444, 0.044198895028, 0.043956043956, 0.043715846995, 0.04347826087, 0.043243243243, 0.043010752688, 0.042780748663, 0.042553191489, 0.042328042328, 0.042105263158, 0.041884816754, 0.041666666667, 0.041450777202, 0.041237113402, 0.041025641026, 0.040816326531, 0.040609137056, 0.040404040404, 0.040201005025, 0.04, 0.039800995025, 0.039603960396, 0.039408866995, 0.039215686275, 0.039024390244, 0.038834951456, 0.038647342995, 0.038461538462, 0.038277511962, 0.038095238095, 0.037914691943, 0.037735849057, 0.037558685446, 0.03738317757, 0.037209302326, 0.037037037037, 0.036866359447, 0.036697247706, 0.036529680365, 0.036363636364, 0.036199095023, 0.036036036036, 0.035874439462, 0.035714285714, 0.035555555556, 0.035398230088, 0.035242290749, 0.035087719298, 0.034934497817, 0.034782608696, 0.034632034632, 0.034482758621, 0.034334763948, 0.034188034188, 0.034042553191, 0.033898305085, 0.033755274262, 0.033613445378, 0.033472803347, 0.033333333333, 0.033195020747, 0.03305785124, 0.0329218107, 0.032786885246, 0.032653061224, 0.032520325203, 0.032388663968, 0.032258064516, 0.032128514056, 0.032, 0.03187250996, 0.031746031746, 0.03162055336, 0.031496062992, 0.03137254902, 0.03125, 0.031128404669, 0.031007751938, 0.030888030888, 0.030769230769, 0.030651340996, 0.030534351145, 0.030418250951, 0.030303030303, 0.030188679245, 0.03007518797, 0.029962546816, 0.029850746269, 0.029739776952, 0.02962962963, 0.029520295203, 0.029411764706, 0.029304029304, 0.029197080292, 0.029090909091, 0.028985507246, 0.028880866426, 0.028776978417, 0.028673835125, 0.028571428571, 0.02846975089, 0.028368794326, 0.028268551237, 0.028169014085, 0.028070175439, 0.027972027972, 0.02787456446, 0.027777777778, 0.0276816609, 0.027586206897, 0.027491408935, 0.027397260274, 0.027303754266, 0.027210884354, 0.027118644068, 0.027027027027, 0.026936026936, 0.026845637584, 0.026755852843, 0.026666666667, 0.02657807309, 0.026490066225, 0.026402640264, 0.026315789474, 0.026229508197, 0.02614379085, 0.026058631922, 0.025974025974, 0.025889967638, 0.025806451613, 0.025723472669, 0.025641025641, 0.025559105431, 0.025477707006, 0.025396825397, 0.025316455696, 0.02523659306, 0.025157232704, 0.025078369906, 0.025, 0.02492211838, 0.024844720497, 0.024767801858, 0.024691358025, 0.024615384615, 0.024539877301, 0.024464831804, 0.024390243902, 0.024316109422, 0.024242424242, 0.02416918429, 0.024096385542, 0.024024024024, 0.023952095808, 0.023880597015, 0.02380952381, 0.023738872404, 0.023668639053, 0.023598820059, 0.023529411765, 0.023460410557, 0.023391812865, 0.02332361516, 0.023255813953, 0.023188405797, 0.023121387283, 0.023054755043, 0.022988505747, 0.022922636103, 0.022857142857, 0.022792022792, 0.022727272727, 0.022662889518, 0.022598870056, 0.022535211268, 0.022471910112, 0.022408963585, 0.022346368715, 0.022284122563, 0.022222222222, 0.02216066482, 0.022099447514, 0.022038567493, 0.021978021978, 0.021917808219, 0.021857923497, 0.021798365123, 0.021739130435, 0.021680216802, 0.021621621622, 0.021563342318, 0.021505376344, 0.02144772118, 0.021390374332, 0.021333333333, 0.021276595745, 0.021220159151, 0.021164021164, 0.02110817942, 0.021052631579, 0.020997375328, 0.020942408377, 0.02088772846, 0.020833333333, 0.020779220779, 0.020725388601, 0.020671834625, 0.020618556701, 0.020565552699, 0.020512820513, 0.020460358056, 0.020408163265, 0.020356234097, 0.020304568528, 0.020253164557, 0.020202020202, 0.020151133501, 0.020100502513, 0.020050125313, 0.02, 0.019950124688, 0.019900497512, 0.019851116625, 0.019801980198, 0.01975308642, 0.019704433498, 0.019656019656, 0.019607843137, 0.0195599022, 0.019512195122, 0.019464720195, 0.019417475728, 0.019370460048, 0.019323671498, 0.019277108434, 0.019230769231, 0.019184652278, 0.019138755981, 0.019093078759, 0.019047619048, 0.019002375297, 0.018957345972, 0.018912529551, 0.018867924528, 0.018823529412, 0.018779342723, 0.018735362998, 0.018691588785, 0.018648018648, 0.018604651163, 0.018561484919, 0.018518518519, 0.018475750577, 0.018433179724, 0.018390804598, 0.018348623853, 0.018306636156, 0.018264840183, 0.018223234624, 0.018181818182, 0.018140589569, 0.018099547511, 0.018058690745, 0.018018018018, 0.01797752809, 0.017937219731, 0.017897091723, 0.017857142857, 0.017817371938, 0.017777777778, 0.017738359202, 0.017699115044, 0.01766004415, 0.017621145374, 0.017582417582, 0.017543859649, 0.01750547046, 0.017467248908, 0.0174291939, 0.017391304348, 0.017353579176, 0.017316017316, 0.017278617711, 0.01724137931, 0.017204301075, 0.017167381974, 0.017130620985, 0.017094017094, 0.017057569296, 0.017021276596, 0.016985138004, 0.016949152542, 0.016913319239, 0.016877637131, 0.016842105263, 0.016806722689, 0.01677148847, 0.016736401674, 0.016701461378, 0.016666666667, 0.016632016632, 0.016597510373, 0.016563146998, 0.01652892562, 0.016494845361, 0.01646090535, 0.016427104723, 0.016393442623, 0.0163599182, 0.016326530612, 0.016293279022, 0.016260162602, 0.016227180527, 0.016194331984, 0.016161616162, 0.016129032258, 0.016096579477, 0.016064257028, 0.016032064128, 0.016, 0.015968063872, 0.01593625498, 0.015904572565, 0.015873015873, 0.015841584158, 0.01581027668, 0.015779092702, 0.015748031496, 0.015717092338, 0.01568627451, 0.015655577299, 0.015625, 0.01559454191, 0.015564202335, 0.015533980583, 0.015503875969, 0.015473887814, 0.015444015444, 0.015414258189, 0.015384615385, 0.015355086372, 0.015325670498, 0.015296367113, 0.015267175573, 0.015238095238, 0.015209125475, 0.015180265655, 0.015151515152, 0.015122873346, 0.015094339623, 0.015065913371, 0.015037593985, 0.015009380863, 0.014981273408, 0.014953271028, 0.014925373134, 0.014897579143, 0.014869888476, 0.014842300557, 0.014814814815, 0.014787430684, 0.014760147601, 0.014732965009, 0.014705882353, 0.014678899083, 0.014652014652, 0.014625228519, 0.014598540146, 0.014571948998, 0.014545454545, 0.014519056261, 0.014492753623, 0.014466546112, 0.014440433213, 0.014414414414, 0.014388489209, 0.014362657092, 0.014336917563, 0.014311270125, 0.014285714286, 0.014260249554, 0.014234875445, 0.014209591474, 0.014184397163, 0.014159292035, 0.014134275618, 0.014109347443, 0.014084507042, 0.014059753954, 0.014035087719, 0.014010507881, 0.013986013986, 0.013961605585, 0.01393728223, 0.013913043478, 0.013888888889, 0.013864818024, 0.01384083045, 0.013816925734, 0.013793103448, 0.013769363167, 0.013745704467, 0.01372212693, 0.013698630137, 0.013675213675, 0.013651877133, 0.013628620102, 0.013605442177, 0.013582342954, 0.013559322034, 0.013536379019, 0.013513513514, 0.013490725126, 0.013468013468, 0.013445378151, 0.013422818792, 0.013400335008, 0.013377926421, 0.013355592654, 0.013333333333, 0.013311148087, 0.013289036545, 0.013266998342, 0.013245033113, 0.013223140496, 0.013201320132, 0.013179571664, 0.013157894737, 0.013136288998, 0.013114754098, 0.013093289689, 0.013071895425, 0.013050570962, 0.013029315961, 0.013008130081, 0.012987012987, 0.012965964344, 0.012944983819, 0.012924071082, 0.012903225806, 0.012882447665, 0.012861736334, 0.012841091493, 0.012820512821, 0.0128, 0.012779552716, 0.012759170654, 0.012738853503, 0.012718600954, 0.012698412698, 0.012678288431, 0.012658227848, 0.012638230648, 0.01261829653, 0.012598425197, 0.012578616352, 0.012558869702, 0.012539184953, 0.012519561815, 0.0125, 0.01248049922, 0.01246105919, 0.012441679627, 0.012422360248, 0.012403100775, 0.012383900929, 0.012364760433, 0.012345679012, 0.012326656394, 0.012307692308, 0.012288786482, 0.01226993865, 0.012251148545, 0.012232415902, 0.012213740458, 0.012195121951, 0.012176560122, 0.012158054711, 0.012139605463, 0.012121212121, 0.012102874433, 0.012084592145, 0.012066365008, 0.012048192771, 0.012030075188, 0.012012012012, 0.011994002999, 0.011976047904, 0.011958146487, 0.011940298507, 0.011922503726, 0.011904761905, 0.011887072808, 0.011869436202, 0.011851851852, 0.011834319527, 0.011816838996, 0.011799410029, 0.011782032401, 0.011764705882, 0.01174743025, 0.011730205279, 0.011713030747, 0.011695906433, 0.011678832117, 0.01166180758, 0.011644832606, 0.011627906977, 0.011611030479, 0.011594202899, 0.011577424023, 0.011560693642, 0.011544011544, 0.011527377522, 0.011510791367, 0.011494252874, 0.011477761836, 0.011461318052, 0.011444921316, 0.011428571429, 0.011412268188, 0.011396011396, 0.011379800853, 0.011363636364, 0.01134751773, 0.011331444759, 0.011315417256, 0.011299435028, 0.011283497884, 0.011267605634, 0.011251758087, 0.011235955056, 0.011220196353, 0.011204481793, 0.011188811189, 0.011173184358, 0.011157601116, 0.011142061281, 0.011126564673, 0.011111111111, 0.011095700416, 0.01108033241, 0.011065006916, 0.011049723757, 0.011034482759, 0.011019283747, 0.011004126547, 0.010989010989, 0.0109739369, 0.01095890411, 0.010943912449, 0.010928961749, 0.010914051842, 0.010899182561, 0.010884353741, 0.010869565217, 0.010854816825, 0.010840108401, 0.010825439783, 0.010810810811, 0.010796221323, 0.010781671159, 0.010767160162, 0.010752688172, 0.010738255034, 0.01072386059, 0.010709504685, 0.010695187166, 0.010680907877, 0.010666666667, 0.010652463382, 0.010638297872, 0.010624169987, 0.010610079576, 0.01059602649, 0.010582010582, 0.010568031704, 0.01055408971, 0.010540184453, 0.010526315789, 0.010512483574, 0.010498687664, 0.010484927916, 0.010471204188, 0.01045751634, 0.01044386423, 0.010430247718, 0.010416666667, 0.010403120936, 0.01038961039, 0.01037613489, 0.010362694301, 0.010349288486, 0.010335917313, 0.010322580645, 0.010309278351, 0.010296010296, 0.01028277635, 0.01026957638, 0.010256410256, 0.010243277849, 0.010230179028, 0.010217113665, 0.010204081633, 0.010191082803, 0.010178117048, 0.010165184244, 0.010152284264, 0.010139416984, 0.010126582278, 0.010113780025, 0.010101010101, 0.010088272383, 0.010075566751, 0.010062893082, 0.010050251256, 0.010037641154, 0.010025062657, 0.010012515645, 0.01, 0.009987515605, 0.009975062344, 0.0099626401, 0.009950248756, 0.009937888199, 0.009925558313, 0.009913258984, 0.009900990099, 0.009888751545, 0.00987654321, 0.009864364982, 0.009852216749, 0.009840098401, 0.009828009828, 0.00981595092, 0.009803921569, 0.009791921665, 0.0097799511, 0.009768009768, 0.009756097561, 0.009744214373, 0.009732360097, 0.009720534629, 0.009708737864, 0.009696969697, 0.009685230024, 0.009673518742, 0.009661835749, 0.009650180941, 0.009638554217, 0.009626955475, 0.009615384615, 0.009603841537, 0.009592326139, 0.009580838323, 0.00956937799, 0.009557945042, 0.009546539379, 0.009535160906, 0.009523809524, 0.009512485137, 0.009501187648, 0.009489916963, 0.009478672986, 0.009467455621, 0.009456264775, 0.009445100354, 0.009433962264, 0.009422850412, 0.009411764706, 0.009400705053, 0.009389671362, 0.00937866354, 0.009367681499, 0.009356725146, 0.009345794393, 0.009334889148, 0.009324009324, 0.009313154831, 0.009302325581, 0.009291521487, 0.009280742459, 0.009269988413, 0.009259259259, 0.009248554913, 0.009237875289, 0.0092272203, 0.009216589862, 0.00920598389, 0.009195402299, 0.009184845006, 0.009174311927, 0.009163802978, 0.009153318078, 0.009142857143, 0.009132420091, 0.009122006842, 0.009111617312, 0.009101251422, 0.009090909091, 0.009080590238, 0.009070294785, 0.00906002265, 0.009049773756, 0.009039548023, 0.009029345372, 0.009019165727, 0.009009009009, 0.008998875141, 0.008988764045, 0.008978675645, 0.008968609865, 0.008958566629, 0.008948545861, 0.008938547486, 0.008928571429, 0.008918617614, 0.008908685969, 0.008898776418, 0.008888888889, 0.008879023307, 0.008869179601, 0.008859357697, 0.008849557522, 0.008839779006, 0.008830022075, 0.008820286659, 0.008810572687, 0.008800880088, 0.008791208791, 0.008781558727, 0.008771929825, 0.008762322015, 0.00875273523, 0.008743169399, 0.008733624454, 0.008724100327, 0.00871459695, 0.008705114255, 0.008695652174, 0.008686210641, 0.008676789588, 0.008667388949, 0.008658008658, 0.008648648649, 0.008639308855, 0.008629989213, 0.008620689655, 0.008611410118, 0.008602150538, 0.008592910849, 0.008583690987, 0.00857449089, 0.008565310493, 0.008556149733, 0.008547008547, 0.008537886873, 0.008528784648, 0.00851970181, 0.008510638298, 0.008501594049, 0.008492569002, 0.008483563097, 0.008474576271, 0.008465608466, 0.008456659619, 0.008447729673, 0.008438818565, 0.008429926238, 0.008421052632, 0.008412197687, 0.008403361345, 0.008394543547, 0.008385744235, 0.008376963351, 0.008368200837, 0.008359456635, 0.008350730689, 0.008342022941, 0.008333333333, 0.008324661811, 0.008316008316, 0.008307372793, 0.008298755187, 0.00829015544, 0.008281573499, 0.008273009307, 0.00826446281, 0.008255933953, 0.00824742268, 0.008238928939, 0.008230452675, 0.008221993834, 0.008213552361, 0.008205128205, 0.008196721311, 0.008188331627, 0.0081799591, 0.008171603677, 0.008163265306, 0.008154943935, 0.008146639511, 0.008138351984, 0.008130081301, 0.008121827411, 0.008113590264, 0.008105369807, 0.008097165992, 0.008088978766, 0.008080808081, 0.008072653885, 0.008064516129, 0.008056394763, 0.008048289738, 0.008040201005, 0.008032128514, 0.008024072217, 0.008016032064, 0.008008008008, 0.008, 0.007992007992, 0.007984031936, 0.007976071785, 0.00796812749, 0.007960199005, 0.007952286282, 0.007944389275, 0.007936507937, 0.00792864222, 0.007920792079, 0.007912957468, 0.00790513834, 0.00789733465, 0.007889546351, 0.007881773399, 0.007874015748, 0.007866273353, 0.007858546169, 0.007850834151, 0.007843137255, 0.007835455436, 0.00782778865, 0.007820136852}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table_latency.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table_latency.tb new file mode 100644 index 0000000000..8de7708ed4 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table_latency.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t invert_table_latency[1024] = {1.7976931348623157e+308, 4.0, 2.0, 1.333333333333, 1.0, 0.8, 0.666666666667, 0.571428571429, 0.5, 0.444444444444, 0.4, 0.363636363636, 0.333333333333, 0.307692307692, 0.285714285714, 0.266666666667, 0.25, 0.235294117647, 0.222222222222, 0.210526315789, 0.2, 0.190476190476, 0.181818181818, 0.173913043478, 0.166666666667, 0.16, 0.153846153846, 0.148148148148, 0.142857142857, 0.137931034483, 0.133333333333, 0.129032258065, 0.125, 0.121212121212, 0.117647058824, 0.114285714286, 0.111111111111, 0.108108108108, 0.105263157895, 0.102564102564, 0.1, 0.09756097561, 0.095238095238, 0.093023255814, 0.090909090909, 0.088888888889, 0.086956521739, 0.085106382979, 0.083333333333, 0.081632653061, 0.08, 0.078431372549, 0.076923076923, 0.075471698113, 0.074074074074, 0.072727272727, 0.071428571429, 0.070175438596, 0.068965517241, 0.067796610169, 0.066666666667, 0.065573770492, 0.064516129032, 0.063492063492, 0.0625, 0.061538461538, 0.060606060606, 0.059701492537, 0.058823529412, 0.057971014493, 0.057142857143, 0.056338028169, 0.055555555556, 0.054794520548, 0.054054054054, 0.053333333333, 0.052631578947, 0.051948051948, 0.051282051282, 0.050632911392, 0.05, 0.049382716049, 0.048780487805, 0.048192771084, 0.047619047619, 0.047058823529, 0.046511627907, 0.045977011494, 0.045454545455, 0.044943820225, 0.044444444444, 0.043956043956, 0.04347826087, 0.043010752688, 0.042553191489, 0.042105263158, 0.041666666667, 0.041237113402, 0.040816326531, 0.040404040404, 0.04, 0.039603960396, 0.039215686275, 0.038834951456, 0.038461538462, 0.038095238095, 0.037735849057, 0.03738317757, 0.037037037037, 0.036697247706, 0.036363636364, 0.036036036036, 0.035714285714, 0.035398230088, 0.035087719298, 0.034782608696, 0.034482758621, 0.034188034188, 0.033898305085, 0.033613445378, 0.033333333333, 0.03305785124, 0.032786885246, 0.032520325203, 0.032258064516, 0.032, 0.031746031746, 0.031496062992, 0.03125, 0.031007751938, 0.030769230769, 0.030534351145, 0.030303030303, 0.03007518797, 0.029850746269, 0.02962962963, 0.029411764706, 0.029197080292, 0.028985507246, 0.028776978417, 0.028571428571, 0.028368794326, 0.028169014085, 0.027972027972, 0.027777777778, 0.027586206897, 0.027397260274, 0.027210884354, 0.027027027027, 0.026845637584, 0.026666666667, 0.026490066225, 0.026315789474, 0.02614379085, 0.025974025974, 0.025806451613, 0.025641025641, 0.025477707006, 0.025316455696, 0.025157232704, 0.025, 0.024844720497, 0.024691358025, 0.024539877301, 0.024390243902, 0.024242424242, 0.024096385542, 0.023952095808, 0.02380952381, 0.023668639053, 0.023529411765, 0.023391812865, 0.023255813953, 0.023121387283, 0.022988505747, 0.022857142857, 0.022727272727, 0.022598870056, 0.022471910112, 0.022346368715, 0.022222222222, 0.022099447514, 0.021978021978, 0.021857923497, 0.021739130435, 0.021621621622, 0.021505376344, 0.021390374332, 0.021276595745, 0.021164021164, 0.021052631579, 0.020942408377, 0.020833333333, 0.020725388601, 0.020618556701, 0.020512820513, 0.020408163265, 0.020304568528, 0.020202020202, 0.020100502513, 0.02, 0.019900497512, 0.019801980198, 0.019704433498, 0.019607843137, 0.019512195122, 0.019417475728, 0.019323671498, 0.019230769231, 0.019138755981, 0.019047619048, 0.018957345972, 0.018867924528, 0.018779342723, 0.018691588785, 0.018604651163, 0.018518518519, 0.018433179724, 0.018348623853, 0.018264840183, 0.018181818182, 0.018099547511, 0.018018018018, 0.017937219731, 0.017857142857, 0.017777777778, 0.017699115044, 0.017621145374, 0.017543859649, 0.017467248908, 0.017391304348, 0.017316017316, 0.01724137931, 0.017167381974, 0.017094017094, 0.017021276596, 0.016949152542, 0.016877637131, 0.016806722689, 0.016736401674, 0.016666666667, 0.016597510373, 0.01652892562, 0.01646090535, 0.016393442623, 0.016326530612, 0.016260162602, 0.016194331984, 0.016129032258, 0.016064257028, 0.016, 0.01593625498, 0.015873015873, 0.01581027668, 0.015748031496, 0.01568627451, 0.015625, 0.015564202335, 0.015503875969, 0.015444015444, 0.015384615385, 0.015325670498, 0.015267175573, 0.015209125475, 0.015151515152, 0.015094339623, 0.015037593985, 0.014981273408, 0.014925373134, 0.014869888476, 0.014814814815, 0.014760147601, 0.014705882353, 0.014652014652, 0.014598540146, 0.014545454545, 0.014492753623, 0.014440433213, 0.014388489209, 0.014336917563, 0.014285714286, 0.014234875445, 0.014184397163, 0.014134275618, 0.014084507042, 0.014035087719, 0.013986013986, 0.01393728223, 0.013888888889, 0.01384083045, 0.013793103448, 0.013745704467, 0.013698630137, 0.013651877133, 0.013605442177, 0.013559322034, 0.013513513514, 0.013468013468, 0.013422818792, 0.013377926421, 0.013333333333, 0.013289036545, 0.013245033113, 0.013201320132, 0.013157894737, 0.013114754098, 0.013071895425, 0.013029315961, 0.012987012987, 0.012944983819, 0.012903225806, 0.012861736334, 0.012820512821, 0.012779552716, 0.012738853503, 0.012698412698, 0.012658227848, 0.01261829653, 0.012578616352, 0.012539184953, 0.0125, 0.01246105919, 0.012422360248, 0.012383900929, 0.012345679012, 0.012307692308, 0.01226993865, 0.012232415902, 0.012195121951, 0.012158054711, 0.012121212121, 0.012084592145, 0.012048192771, 0.012012012012, 0.011976047904, 0.011940298507, 0.011904761905, 0.011869436202, 0.011834319527, 0.011799410029, 0.011764705882, 0.011730205279, 0.011695906433, 0.01166180758, 0.011627906977, 0.011594202899, 0.011560693642, 0.011527377522, 0.011494252874, 0.011461318052, 0.011428571429, 0.011396011396, 0.011363636364, 0.011331444759, 0.011299435028, 0.011267605634, 0.011235955056, 0.011204481793, 0.011173184358, 0.011142061281, 0.011111111111, 0.01108033241, 0.011049723757, 0.011019283747, 0.010989010989, 0.01095890411, 0.010928961749, 0.010899182561, 0.010869565217, 0.010840108401, 0.010810810811, 0.010781671159, 0.010752688172, 0.01072386059, 0.010695187166, 0.010666666667, 0.010638297872, 0.010610079576, 0.010582010582, 0.01055408971, 0.010526315789, 0.010498687664, 0.010471204188, 0.01044386423, 0.010416666667, 0.01038961039, 0.010362694301, 0.010335917313, 0.010309278351, 0.01028277635, 0.010256410256, 0.010230179028, 0.010204081633, 0.010178117048, 0.010152284264, 0.010126582278, 0.010101010101, 0.010075566751, 0.010050251256, 0.010025062657, 0.01, 0.009975062344, 0.009950248756, 0.009925558313, 0.009900990099, 0.00987654321, 0.009852216749, 0.009828009828, 0.009803921569, 0.0097799511, 0.009756097561, 0.009732360097, 0.009708737864, 0.009685230024, 0.009661835749, 0.009638554217, 0.009615384615, 0.009592326139, 0.00956937799, 0.009546539379, 0.009523809524, 0.009501187648, 0.009478672986, 0.009456264775, 0.009433962264, 0.009411764706, 0.009389671362, 0.009367681499, 0.009345794393, 0.009324009324, 0.009302325581, 0.009280742459, 0.009259259259, 0.009237875289, 0.009216589862, 0.009195402299, 0.009174311927, 0.009153318078, 0.009132420091, 0.009111617312, 0.009090909091, 0.009070294785, 0.009049773756, 0.009029345372, 0.009009009009, 0.008988764045, 0.008968609865, 0.008948545861, 0.008928571429, 0.008908685969, 0.008888888889, 0.008869179601, 0.008849557522, 0.008830022075, 0.008810572687, 0.008791208791, 0.008771929825, 0.00875273523, 0.008733624454, 0.00871459695, 0.008695652174, 0.008676789588, 0.008658008658, 0.008639308855, 0.008620689655, 0.008602150538, 0.008583690987, 0.008565310493, 0.008547008547, 0.008528784648, 0.008510638298, 0.008492569002, 0.008474576271, 0.008456659619, 0.008438818565, 0.008421052632, 0.008403361345, 0.008385744235, 0.008368200837, 0.008350730689, 0.008333333333, 0.008316008316, 0.008298755187, 0.008281573499, 0.00826446281, 0.00824742268, 0.008230452675, 0.008213552361, 0.008196721311, 0.0081799591, 0.008163265306, 0.008146639511, 0.008130081301, 0.008113590264, 0.008097165992, 0.008080808081, 0.008064516129, 0.008048289738, 0.008032128514, 0.008016032064, 0.008, 0.007984031936, 0.00796812749, 0.007952286282, 0.007936507937, 0.007920792079, 0.00790513834, 0.007889546351, 0.007874015748, 0.007858546169, 0.007843137255, 0.00782778865, -0.0078125, -0.00782778865, -0.007843137255, -0.007858546169, -0.007874015748, -0.007889546351, -0.00790513834, -0.007920792079, -0.007936507937, -0.007952286282, -0.00796812749, -0.007984031936, -0.008, -0.008016032064, -0.008032128514, -0.008048289738, -0.008064516129, -0.008080808081, -0.008097165992, -0.008113590264, -0.008130081301, -0.008146639511, -0.008163265306, -0.0081799591, -0.008196721311, -0.008213552361, -0.008230452675, -0.00824742268, -0.00826446281, -0.008281573499, -0.008298755187, -0.008316008316, -0.008333333333, -0.008350730689, -0.008368200837, -0.008385744235, -0.008403361345, -0.008421052632, -0.008438818565, -0.008456659619, -0.008474576271, -0.008492569002, -0.008510638298, -0.008528784648, -0.008547008547, -0.008565310493, -0.008583690987, -0.008602150538, -0.008620689655, -0.008639308855, -0.008658008658, -0.008676789588, -0.008695652174, -0.00871459695, -0.008733624454, -0.00875273523, -0.008771929825, -0.008791208791, -0.008810572687, -0.008830022075, -0.008849557522, -0.008869179601, -0.008888888889, -0.008908685969, -0.008928571429, -0.008948545861, -0.008968609865, -0.008988764045, -0.009009009009, -0.009029345372, -0.009049773756, -0.009070294785, -0.009090909091, -0.009111617312, -0.009132420091, -0.009153318078, -0.009174311927, -0.009195402299, -0.009216589862, -0.009237875289, -0.009259259259, -0.009280742459, -0.009302325581, -0.009324009324, -0.009345794393, -0.009367681499, -0.009389671362, -0.009411764706, -0.009433962264, -0.009456264775, -0.009478672986, -0.009501187648, -0.009523809524, -0.009546539379, -0.00956937799, -0.009592326139, -0.009615384615, -0.009638554217, -0.009661835749, -0.009685230024, -0.009708737864, -0.009732360097, -0.009756097561, -0.0097799511, -0.009803921569, -0.009828009828, -0.009852216749, -0.00987654321, -0.009900990099, -0.009925558313, -0.009950248756, -0.009975062344, -0.01, -0.010025062657, -0.010050251256, -0.010075566751, -0.010101010101, -0.010126582278, -0.010152284264, -0.010178117048, -0.010204081633, -0.010230179028, -0.010256410256, -0.01028277635, -0.010309278351, -0.010335917313, -0.010362694301, -0.01038961039, -0.010416666667, -0.01044386423, -0.010471204188, -0.010498687664, -0.010526315789, -0.01055408971, -0.010582010582, -0.010610079576, -0.010638297872, -0.010666666667, -0.010695187166, -0.01072386059, -0.010752688172, -0.010781671159, -0.010810810811, -0.010840108401, -0.010869565217, -0.010899182561, -0.010928961749, -0.01095890411, -0.010989010989, -0.011019283747, -0.011049723757, -0.01108033241, -0.011111111111, -0.011142061281, -0.011173184358, -0.011204481793, -0.011235955056, -0.011267605634, -0.011299435028, -0.011331444759, -0.011363636364, -0.011396011396, -0.011428571429, -0.011461318052, -0.011494252874, -0.011527377522, -0.011560693642, -0.011594202899, -0.011627906977, -0.01166180758, -0.011695906433, -0.011730205279, -0.011764705882, -0.011799410029, -0.011834319527, -0.011869436202, -0.011904761905, -0.011940298507, -0.011976047904, -0.012012012012, -0.012048192771, -0.012084592145, -0.012121212121, -0.012158054711, -0.012195121951, -0.012232415902, -0.01226993865, -0.012307692308, -0.012345679012, -0.012383900929, -0.012422360248, -0.01246105919, -0.0125, -0.012539184953, -0.012578616352, -0.01261829653, -0.012658227848, -0.012698412698, -0.012738853503, -0.012779552716, -0.012820512821, -0.012861736334, -0.012903225806, -0.012944983819, -0.012987012987, -0.013029315961, -0.013071895425, -0.013114754098, -0.013157894737, -0.013201320132, -0.013245033113, -0.013289036545, -0.013333333333, -0.013377926421, -0.013422818792, -0.013468013468, -0.013513513514, -0.013559322034, -0.013605442177, -0.013651877133, -0.013698630137, -0.013745704467, -0.013793103448, -0.01384083045, -0.013888888889, -0.01393728223, -0.013986013986, -0.014035087719, -0.014084507042, -0.014134275618, -0.014184397163, -0.014234875445, -0.014285714286, -0.014336917563, -0.014388489209, -0.014440433213, -0.014492753623, -0.014545454545, -0.014598540146, -0.014652014652, -0.014705882353, -0.014760147601, -0.014814814815, -0.014869888476, -0.014925373134, -0.014981273408, -0.015037593985, -0.015094339623, -0.015151515152, -0.015209125475, -0.015267175573, -0.015325670498, -0.015384615385, -0.015444015444, -0.015503875969, -0.015564202335, -0.015625, -0.01568627451, -0.015748031496, -0.01581027668, -0.015873015873, -0.01593625498, -0.016, -0.016064257028, -0.016129032258, -0.016194331984, -0.016260162602, -0.016326530612, -0.016393442623, -0.01646090535, -0.01652892562, -0.016597510373, -0.016666666667, -0.016736401674, -0.016806722689, -0.016877637131, -0.016949152542, -0.017021276596, -0.017094017094, -0.017167381974, -0.01724137931, -0.017316017316, -0.017391304348, -0.017467248908, -0.017543859649, -0.017621145374, -0.017699115044, -0.017777777778, -0.017857142857, -0.017937219731, -0.018018018018, -0.018099547511, -0.018181818182, -0.018264840183, -0.018348623853, -0.018433179724, -0.018518518519, -0.018604651163, -0.018691588785, -0.018779342723, -0.018867924528, -0.018957345972, -0.019047619048, -0.019138755981, -0.019230769231, -0.019323671498, -0.019417475728, -0.019512195122, -0.019607843137, -0.019704433498, -0.019801980198, -0.019900497512, -0.02, -0.020100502513, -0.020202020202, -0.020304568528, -0.020408163265, -0.020512820513, -0.020618556701, -0.020725388601, -0.020833333333, -0.020942408377, -0.021052631579, -0.021164021164, -0.021276595745, -0.021390374332, -0.021505376344, -0.021621621622, -0.021739130435, -0.021857923497, -0.021978021978, -0.022099447514, -0.022222222222, -0.022346368715, -0.022471910112, -0.022598870056, -0.022727272727, -0.022857142857, -0.022988505747, -0.023121387283, -0.023255813953, -0.023391812865, -0.023529411765, -0.023668639053, -0.02380952381, -0.023952095808, -0.024096385542, -0.024242424242, -0.024390243902, -0.024539877301, -0.024691358025, -0.024844720497, -0.025, -0.025157232704, -0.025316455696, -0.025477707006, -0.025641025641, -0.025806451613, -0.025974025974, -0.02614379085, -0.026315789474, -0.026490066225, -0.026666666667, -0.026845637584, -0.027027027027, -0.027210884354, -0.027397260274, -0.027586206897, -0.027777777778, -0.027972027972, -0.028169014085, -0.028368794326, -0.028571428571, -0.028776978417, -0.028985507246, -0.029197080292, -0.029411764706, -0.02962962963, -0.029850746269, -0.03007518797, -0.030303030303, -0.030534351145, -0.030769230769, -0.031007751938, -0.03125, -0.031496062992, -0.031746031746, -0.032, -0.032258064516, -0.032520325203, -0.032786885246, -0.03305785124, -0.033333333333, -0.033613445378, -0.033898305085, -0.034188034188, -0.034482758621, -0.034782608696, -0.035087719298, -0.035398230088, -0.035714285714, -0.036036036036, -0.036363636364, -0.036697247706, -0.037037037037, -0.03738317757, -0.037735849057, -0.038095238095, -0.038461538462, -0.038834951456, -0.039215686275, -0.039603960396, -0.04, -0.040404040404, -0.040816326531, -0.041237113402, -0.041666666667, -0.042105263158, -0.042553191489, -0.043010752688, -0.04347826087, -0.043956043956, -0.044444444444, -0.044943820225, -0.045454545455, -0.045977011494, -0.046511627907, -0.047058823529, -0.047619047619, -0.048192771084, -0.048780487805, -0.049382716049, -0.05, -0.050632911392, -0.051282051282, -0.051948051948, -0.052631578947, -0.053333333333, -0.054054054054, -0.054794520548, -0.055555555556, -0.056338028169, -0.057142857143, -0.057971014493, -0.058823529412, -0.059701492537, -0.060606060606, -0.061538461538, -0.0625, -0.063492063492, -0.064516129032, -0.065573770492, -0.066666666667, -0.067796610169, -0.068965517241, -0.070175438596, -0.071428571429, -0.072727272727, -0.074074074074, -0.075471698113, -0.076923076923, -0.078431372549, -0.08, -0.081632653061, -0.083333333333, -0.085106382979, -0.086956521739, -0.088888888889, -0.090909090909, -0.093023255814, -0.095238095238, -0.09756097561, -0.1, -0.102564102564, -0.105263157895, -0.108108108108, -0.111111111111, -0.114285714286, -0.117647058824, -0.121212121212, -0.125, -0.129032258065, -0.133333333333, -0.137931034483, -0.142857142857, -0.148148148148, -0.153846153846, -0.16, -0.166666666667, -0.173913043478, -0.181818181818, -0.190476190476, -0.2, -0.210526315789, -0.222222222222, -0.235294117647, -0.25, -0.266666666667, -0.285714285714, -0.307692307692, -0.333333333333, -0.363636363636, -0.4, -0.444444444444, -0.5, -0.571428571429, -0.666666666667, -0.8, -1.0, -1.333333333333, -2.0, -4.0}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table_legacy.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table_legacy.tb new file mode 100644 index 0000000000..a113e4f23c --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/invert_table_legacy.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t invert_table_legacy[1024] = {0, 16.0, 8.0, 5.333333333333333, 4.0, 3.2, 2.6666666666666665, 2.2857142857142856, 2.0, 1.7777777777777777, 1.6, 1.4545454545454546, 1.3333333333333333, 1.2307692307692308, 1.1428571428571428, 1.0666666666666667, 1.0, 0.9411764705882353, 0.8888888888888888, 0.8421052631578947, 0.8, 0.7619047619047619, 0.7272727272727273, 0.6956521739130435, 0.6666666666666666, 0.64, 0.6153846153846154, 0.5925925925925926, 0.5714285714285714, 0.5517241379310345, 0.5333333333333333, 0.5161290322580645, 0.5, 0.48484848484848486, 0.47058823529411764, 0.45714285714285713, 0.4444444444444444, 0.43243243243243246, 0.42105263157894735, 0.41025641025641024, 0.4, 0.3902439024390244, 0.38095238095238093, 0.37209302325581395, 0.36363636363636365, 0.35555555555555557, 0.34782608695652173, 0.3404255319148936, 0.3333333333333333, 0.32653061224489793, 0.32, 0.3137254901960784, 0.3076923076923077, 0.3018867924528302, 0.2962962962962963, 0.2909090909090909, 0.2857142857142857, 0.2807017543859649, 0.27586206896551724, 0.2711864406779661, 0.26666666666666666, 0.26229508196721313, 0.25806451612903225, 0.25396825396825395, 0.25, 0.24615384615384617, 0.24242424242424243, 0.23880597014925373, 0.23529411764705882, 0.2318840579710145, 0.22857142857142856, 0.22535211267605634, 0.2222222222222222, 0.2191780821917808, 0.21621621621621623, 0.21333333333333335, 0.21052631578947367, 0.2077922077922078, 0.20512820512820512, 0.20253164556962025, 0.2, 0.19753086419753085, 0.1951219512195122, 0.1927710843373494, 0.19047619047619047, 0.18823529411764706, 0.18604651162790697, 0.1839080459770115, 0.18181818181818182, 0.1797752808988764, 0.17777777777777778, 0.17582417582417584, 0.17391304347826086, 0.17204301075268819, 0.1702127659574468, 0.16842105263157894, 0.16666666666666666, 0.16494845360824742, 0.16326530612244897, 0.16161616161616163, 0.16, 0.15841584158415842, 0.1568627450980392, 0.1553398058252427, 0.15384615384615385, 0.1523809523809524, 0.1509433962264151, 0.14953271028037382, 0.14814814814814814, 0.14678899082568808, 0.14545454545454545, 0.14414414414414414, 0.14285714285714285, 0.1415929203539823, 0.14035087719298245, 0.1391304347826087, 0.13793103448275862, 0.13675213675213677, 0.13559322033898305, 0.13445378151260504, 0.13333333333333333, 0.1322314049586777, 0.13114754098360656, 0.13008130081300814, 0.12903225806451613, 0.128, 0.12698412698412698, 0.12598425196850394, 0.125, 0.12403100775193798, 0.12307692307692308, 0.12213740458015267, 0.12121212121212122, 0.12030075187969924, 0.11940298507462686, 0.11851851851851852, 0.11764705882352941, 0.11678832116788321, 0.11594202898550725, 0.11510791366906475, 0.11428571428571428, 0.11347517730496454, 0.11267605633802817, 0.11188811188811189, 0.1111111111111111, 0.1103448275862069, 0.1095890410958904, 0.10884353741496598, 0.10810810810810811, 0.10738255033557047, 0.10666666666666667, 0.10596026490066225, 0.10526315789473684, 0.10457516339869281, 0.1038961038961039, 0.1032258064516129, 0.10256410256410256, 0.10191082802547771, 0.10126582278481013, 0.10062893081761007, 0.1, 0.09937888198757763, 0.09876543209876543, 0.09815950920245399, 0.0975609756097561, 0.09696969696969697, 0.0963855421686747, 0.09580838323353294, 0.09523809523809523, 0.09467455621301775, 0.09411764705882353, 0.0935672514619883, 0.09302325581395349, 0.09248554913294797, 0.09195402298850575, 0.09142857142857143, 0.09090909090909091, 0.0903954802259887, 0.0898876404494382, 0.0893854748603352, 0.08888888888888889, 0.08839779005524862, 0.08791208791208792, 0.08743169398907104, 0.08695652173913043, 0.08648648648648649, 0.08602150537634409, 0.0855614973262032, 0.0851063829787234, 0.08465608465608465, 0.08421052631578947, 0.08376963350785341, 0.08333333333333333, 0.08290155440414508, 0.08247422680412371, 0.08205128205128205, 0.08163265306122448, 0.08121827411167512, 0.08080808080808081, 0.08040201005025126, 0.08, 0.07960199004975124, 0.07920792079207921, 0.07881773399014778, 0.0784313725490196, 0.07804878048780488, 0.07766990291262135, 0.07729468599033816, 0.07692307692307693, 0.07655502392344497, 0.0761904761904762, 0.07582938388625593, 0.07547169811320754, 0.07511737089201878, 0.07476635514018691, 0.07441860465116279, 0.07407407407407407, 0.07373271889400922, 0.07339449541284404, 0.0730593607305936, 0.07272727272727272, 0.07239819004524888, 0.07207207207207207, 0.07174887892376682, 0.07142857142857142, 0.07111111111111111, 0.07079646017699115, 0.07048458149779736, 0.07017543859649122, 0.06986899563318777, 0.06956521739130435, 0.06926406926406926, 0.06896551724137931, 0.06866952789699571, 0.06837606837606838, 0.06808510638297872, 0.06779661016949153, 0.06751054852320675, 0.06722689075630252, 0.06694560669456066, 0.06666666666666667, 0.06639004149377593, 0.06611570247933884, 0.06584362139917696, 0.06557377049180328, 0.0653061224489796, 0.06504065040650407, 0.06477732793522267, 0.06451612903225806, 0.0642570281124498, 0.064, 0.06374501992031872, 0.06349206349206349, 0.06324110671936758, 0.06299212598425197, 0.06274509803921569, 0.0625, 0.0622568093385214, 0.06201550387596899, 0.06177606177606178, 0.06153846153846154, 0.06130268199233716, 0.061068702290076333, 0.060836501901140684, 0.06060606060606061, 0.06037735849056604, 0.06015037593984962, 0.0599250936329588, 0.05970149253731343, 0.05947955390334572, 0.05925925925925926, 0.05904059040590406, 0.058823529411764705, 0.05860805860805861, 0.058394160583941604, 0.05818181818181818, 0.057971014492753624, 0.05776173285198556, 0.05755395683453238, 0.05734767025089606, 0.05714285714285714, 0.05693950177935943, 0.05673758865248227, 0.05653710247349823, 0.056338028169014086, 0.056140350877192984, 0.055944055944055944, 0.05574912891986063, 0.05555555555555555, 0.05536332179930796, 0.05517241379310345, 0.054982817869415807, 0.0547945205479452, 0.05460750853242321, 0.05442176870748299, 0.05423728813559322, 0.05405405405405406, 0.05387205387205387, 0.053691275167785234, 0.05351170568561873, 0.05333333333333334, 0.053156146179401995, 0.052980132450331126, 0.052805280528052806, 0.05263157894736842, 0.05245901639344262, 0.05228758169934641, 0.05211726384364821, 0.05194805194805195, 0.05177993527508091, 0.05161290322580645, 0.05144694533762058, 0.05128205128205128, 0.051118210862619806, 0.050955414012738856, 0.050793650793650794, 0.05063291139240506, 0.050473186119873815, 0.050314465408805034, 0.050156739811912224, 0.05, 0.04984423676012461, 0.049689440993788817, 0.04953560371517028, 0.04938271604938271, 0.04923076923076923, 0.049079754601226995, 0.04892966360856269, 0.04878048780487805, 0.0486322188449848, 0.048484848484848485, 0.04833836858006042, 0.04819277108433735, 0.04804804804804805, 0.04790419161676647, 0.04776119402985075, 0.047619047619047616, 0.04747774480712166, 0.047337278106508875, 0.0471976401179941, 0.047058823529411764, 0.0469208211143695, 0.04678362573099415, 0.04664723032069971, 0.046511627906976744, 0.0463768115942029, 0.046242774566473986, 0.04610951008645533, 0.04597701149425287, 0.045845272206303724, 0.045714285714285714, 0.045584045584045586, 0.045454545454545456, 0.0453257790368272, 0.04519774011299435, 0.04507042253521127, 0.0449438202247191, 0.04481792717086835, 0.0446927374301676, 0.04456824512534819, 0.044444444444444446, 0.0443213296398892, 0.04419889502762431, 0.0440771349862259, 0.04395604395604396, 0.043835616438356165, 0.04371584699453552, 0.043596730245231606, 0.043478260869565216, 0.04336043360433604, 0.043243243243243246, 0.0431266846361186, 0.043010752688172046, 0.04289544235924933, 0.0427807486631016, 0.042666666666666665, 0.0425531914893617, 0.042440318302387266, 0.042328042328042326, 0.04221635883905013, 0.042105263157894736, 0.04199475065616798, 0.041884816753926704, 0.04177545691906005, 0.041666666666666664, 0.04155844155844156, 0.04145077720207254, 0.041343669250646, 0.041237113402061855, 0.04113110539845758, 0.041025641025641026, 0.04092071611253197, 0.04081632653061224, 0.04071246819338423, 0.04060913705583756, 0.04050632911392405, 0.04040404040404041, 0.04030226700251889, 0.04020100502512563, 0.040100250626566414, 0.04, 0.0399002493765586, 0.03980099502487562, 0.03970223325062035, 0.039603960396039604, 0.03950617283950617, 0.03940886699507389, 0.03931203931203931, 0.0392156862745098, 0.039119804400977995, 0.03902439024390244, 0.038929440389294405, 0.038834951456310676, 0.0387409200968523, 0.03864734299516908, 0.03855421686746988, 0.038461538461538464, 0.03836930455635491, 0.03827751196172249, 0.03818615751789976, 0.0380952380952381, 0.03800475059382423, 0.037914691943127965, 0.037825059101654845, 0.03773584905660377, 0.03764705882352941, 0.03755868544600939, 0.03747072599531616, 0.037383177570093455, 0.037296037296037296, 0.037209302325581395, 0.037122969837587005, 0.037037037037037035, 0.03695150115473441, 0.03686635944700461, 0.0367816091954023, 0.03669724770642202, 0.036613272311212815, 0.0365296803652968, 0.03644646924829157, 0.03636363636363636, 0.036281179138321996, 0.03619909502262444, 0.03611738148984198, 0.036036036036036036, 0.035955056179775284, 0.03587443946188341, 0.035794183445190156, 0.03571428571428571, 0.035634743875278395, 0.035555555555555556, 0.03547671840354767, 0.035398230088495575, 0.03532008830022075, 0.03524229074889868, 0.035164835164835165, 0.03508771929824561, 0.0350109409190372, 0.034934497816593885, 0.034858387799564274, 0.034782608695652174, 0.03470715835140998, 0.03463203463203463, 0.03455723542116631, 0.034482758620689655, 0.034408602150537634, 0.034334763948497854, 0.034261241970021415, 0.03418803418803419, 0.03411513859275053, 0.03404255319148936, 0.03397027600849257, 0.03389830508474576, 0.03382663847780127, 0.03375527426160337, 0.03368421052631579, 0.03361344537815126, 0.033542976939203356, 0.03347280334728033, 0.033402922755741124, 0.03333333333333333, 0.033264033264033266, 0.03319502074688797, 0.033126293995859216, 0.03305785123966942, 0.032989690721649485, 0.03292181069958848, 0.03285420944558522, 0.03278688524590164, 0.032719836400818, 0.0326530612244898, 0.032586558044806514, 0.032520325203252036, 0.032454361054766734, 0.032388663967611336, 0.03232323232323232, 0.03225806451612903, 0.03219315895372234, 0.0321285140562249, 0.03206412825651302, 0.032, 0.031936127744510975, 0.03187250996015936, 0.03180914512922465, 0.031746031746031744, 0.031683168316831684, 0.03162055335968379, 0.03155818540433925, 0.031496062992125984, 0.03143418467583497, 0.03137254901960784, 0.03131115459882583, 0.03125, 0.031189083820662766, 0.0311284046692607, 0.031067961165048542, 0.031007751937984496, 0.030947775628626693, 0.03088803088803089, 0.030828516377649325, 0.03076923076923077, 0.030710172744721688, 0.03065134099616858, 0.030592734225621414, 0.030534351145038167, 0.030476190476190476, 0.030418250950570342, 0.030360531309297913, 0.030303030303030304, 0.030245746691871456, 0.03018867924528302, 0.030131826741996232, 0.03007518796992481, 0.0300187617260788, 0.0299625468164794, 0.029906542056074768, 0.029850746268656716, 0.0297951582867784, 0.02973977695167286, 0.029684601113172542, 0.02962962962962963, 0.029574861367837338, 0.02952029520295203, 0.029465930018416207, 0.029411764705882353, 0.029357798165137616, 0.029304029304029304, 0.029250457038391225, 0.029197080291970802, 0.029143897996357013, 0.02909090909090909, 0.029038112522686024, 0.028985507246376812, 0.028933092224231464, 0.02888086642599278, 0.02882882882882883, 0.02877697841726619, 0.02872531418312388, 0.02867383512544803, 0.028622540250447227, 0.02857142857142857, 0.0285204991087344, 0.028469750889679714, 0.028419182948490232, 0.028368794326241134, 0.02831858407079646, 0.028268551236749116, 0.02821869488536155, 0.028169014084507043, 0.028119507908611598, 0.028070175438596492, 0.028021015761821366, 0.027972027972027972, 0.027923211169284468, 0.027874564459930314, 0.02782608695652174, 0.027777777777777776, 0.02772963604852686, 0.02768166089965398, 0.027633851468048358, 0.027586206896551724, 0.027538726333907058, 0.027491408934707903, 0.0274442538593482, 0.0273972602739726, 0.02735042735042735, 0.027303754266211604, 0.027257240204429302, 0.027210884353741496, 0.027164685908319185, 0.02711864406779661, 0.02707275803722504, 0.02702702702702703, 0.026981450252951095, 0.026936026936026935, 0.02689075630252101, 0.026845637583892617, 0.02680067001675042, 0.026755852842809364, 0.02671118530884808, 0.02666666666666667, 0.026622296173044926, 0.026578073089700997, 0.026533996683250415, 0.026490066225165563, 0.026446280991735537, 0.026402640264026403, 0.026359143327841845, 0.02631578947368421, 0.026272577996715927, 0.02622950819672131, 0.02618657937806874, 0.026143790849673203, 0.026101141924959218, 0.026058631921824105, 0.026016260162601626, 0.025974025974025976, 0.02593192868719611, 0.025889967637540454, 0.025848142164781908, 0.025806451612903226, 0.02576489533011272, 0.02572347266881029, 0.025682182985553772, 0.02564102564102564, 0.0256, 0.025559105431309903, 0.025518341307814992, 0.025477707006369428, 0.025437201907790145, 0.025396825396825397, 0.025356576862123614, 0.02531645569620253, 0.02527646129541864, 0.025236593059936908, 0.025196850393700787, 0.025157232704402517, 0.02511773940345369, 0.025078369905956112, 0.025039123630672927, 0.025, 0.0249609984399376, 0.024922118380062305, 0.024883359253499222, 0.024844720496894408, 0.024806201550387597, 0.02476780185758514, 0.02472952086553323, 0.024691358024691357, 0.02465331278890601, 0.024615384615384615, 0.02457757296466974, 0.024539877300613498, 0.02450229709035222, 0.024464831804281346, 0.024427480916030534, 0.024390243902439025, 0.0243531202435312, 0.0243161094224924, 0.024279210925644917, 0.024242424242424242, 0.024205748865355523, 0.02416918429003021, 0.024132730015082957, 0.024096385542168676, 0.02406015037593985, 0.024024024024024024, 0.0239880059970015, 0.023952095808383235, 0.02391629297458894, 0.023880597014925373, 0.02384500745156483, 0.023809523809523808, 0.0237741456166419, 0.02373887240356083, 0.023703703703703703, 0.023668639053254437, 0.023633677991137372, 0.02359882005899705, 0.023564064801178203, 0.023529411764705882, 0.023494860499265784, 0.02346041055718475, 0.02342606149341142, 0.023391812865497075, 0.02335766423357664, 0.023323615160349854, 0.023289665211062592, 0.023255813953488372, 0.023222060957910014, 0.02318840579710145, 0.023154848046309694, 0.023121387283236993, 0.023088023088023088, 0.023054755043227664, 0.02302158273381295, 0.022988505747126436, 0.02295552367288379, 0.022922636103151862, 0.022889842632331903, 0.022857142857142857, 0.02282453637660485, 0.022792022792022793, 0.02275960170697013, 0.022727272727272728, 0.02269503546099291, 0.0226628895184136, 0.02263083451202263, 0.022598870056497175, 0.022566995768688293, 0.022535211267605635, 0.02250351617440225, 0.02247191011235955, 0.02244039270687237, 0.022408963585434174, 0.022377622377622378, 0.0223463687150838, 0.022315202231520222, 0.022284122562674095, 0.022253129346314324, 0.022222222222222223, 0.022191400832177532, 0.0221606648199446, 0.022130013831258646, 0.022099447513812154, 0.022068965517241378, 0.02203856749311295, 0.02200825309491059, 0.02197802197802198, 0.02194787379972565, 0.021917808219178082, 0.02188782489740082, 0.02185792349726776, 0.021828103683492497, 0.021798365122615803, 0.021768707482993196, 0.021739130434782608, 0.021709633649932156, 0.02168021680216802, 0.02165087956698241, 0.021621621621621623, 0.021592442645074223, 0.0215633423180593, 0.021534320323014805, 0.021505376344086023, 0.021476510067114093, 0.021447721179624665, 0.0214190093708166, 0.0213903743315508, 0.021361815754339118, 0.021333333333333333, 0.02130492676431425, 0.02127659574468085, 0.021248339973439574, 0.021220159151193633, 0.02119205298013245, 0.021164021164021163, 0.021136063408190225, 0.021108179419525065, 0.021080368906455864, 0.021052631578947368, 0.02102496714848883, 0.02099737532808399, 0.020969855832241154, 0.020942408376963352, 0.02091503267973856, 0.020887728459530026, 0.020860495436766623, 0.020833333333333332, 0.02080624187256177, 0.02077922077922078, 0.020752269779507133, 0.02072538860103627, 0.02069857697283312, 0.020671834625323, 0.02064516129032258, 0.020618556701030927, 0.02059202059202059, 0.02056555269922879, 0.02053915275994865, 0.020512820512820513, 0.020486555697823303, 0.020460358056265986, 0.020434227330779056, 0.02040816326530612, 0.02038216560509554, 0.020356234096692113, 0.020330368487928845, 0.02030456852791878, 0.020278833967046894, 0.020253164556962026, 0.020227560050568902, 0.020202020202020204, 0.0201765447667087, 0.020151133501259445, 0.02012578616352201, 0.020100502512562814, 0.020075282308657464, 0.020050125313283207, 0.02002503128911139, 0.02, 0.019975031210986267, 0.0199501246882793, 0.019925280199252802, 0.01990049751243781, 0.01987577639751553, 0.019851116625310174, 0.01982651796778191, 0.019801980198019802, 0.019777503090234856, 0.019753086419753086, 0.01972872996300863, 0.019704433497536946, 0.01968019680196802, 0.019656019656019656, 0.0196319018404908, 0.0196078431372549, 0.019583843329253364, 0.019559902200488997, 0.019536019536019536, 0.01951219512195122, 0.0194884287454324, 0.019464720194647202, 0.019441069258809233, 0.019417475728155338, 0.019393939393939394, 0.01937046004842615, 0.019347037484885126, 0.01932367149758454, 0.019300361881785282, 0.01927710843373494, 0.019253910950661854, 0.019230769230769232, 0.01920768307322929, 0.019184652278177457, 0.019161676646706587, 0.019138755980861243, 0.019115890083632018, 0.01909307875894988, 0.01907032181168057, 0.01904761904761905, 0.019024970273483946, 0.019002375296912115, 0.018979833926453145, 0.018957345971563982, 0.01893491124260355, 0.018912529550827423, 0.018890200708382526, 0.018867924528301886, 0.01884570082449941, 0.018823529411764704, 0.01880141010575793, 0.018779342723004695, 0.01875732708089097, 0.01873536299765808, 0.01871345029239766, 0.018691588785046728, 0.01866977829638273, 0.018648018648018648, 0.018626309662398137, 0.018604651162790697, 0.018583042973286876, 0.018561484918793503, 0.01853997682502897, 0.018518518518518517, 0.018497109826589597, 0.018475750577367205, 0.01845444059976932, 0.018433179723502304, 0.018411967779056387, 0.01839080459770115, 0.018369690011481057, 0.01834862385321101, 0.018327605956471937, 0.018306636155606407, 0.018285714285714287, 0.0182648401826484, 0.018244013683010263, 0.018223234624145785, 0.01820250284414107, 0.01818181818181818, 0.018161180476730987, 0.018140589569160998, 0.01812004530011325, 0.01809954751131222, 0.01807909604519774, 0.01805869074492099, 0.018038331454340473, 0.018018018018018018, 0.01799775028121485, 0.017977528089887642, 0.017957351290684626, 0.017937219730941704, 0.01791713325867861, 0.017897091722595078, 0.017877094972067038, 0.017857142857142856, 0.017837235228539576, 0.017817371937639197, 0.017797552836484983, 0.017777777777777778, 0.017758046614872364, 0.017738359201773836, 0.017718715393133997, 0.017699115044247787, 0.017679558011049725, 0.017660044150110375, 0.017640573318632856, 0.01762114537444934, 0.0176017601760176, 0.017582417582417582, 0.01756311745334797, 0.017543859649122806, 0.017524644030668127, 0.0175054704595186, 0.017486338797814208, 0.017467248908296942, 0.017448200654307525, 0.017429193899782137, 0.017410228509249184, 0.017391304347826087, 0.01737242128121607, 0.01735357917570499, 0.01733477789815818, 0.017316017316017316, 0.017297297297297298, 0.017278617710583154, 0.017259978425026967, 0.017241379310344827, 0.017222820236813777, 0.017204301075268817, 0.017185821697099892, 0.017167381974248927, 0.01714898177920686, 0.017130620985010708, 0.017112299465240642, 0.017094017094017096, 0.017075773745997867, 0.017057569296375266, 0.01703940362087327, 0.01702127659574468, 0.01700318809776833, 0.016985138004246284, 0.016967126193001062, 0.01694915254237288, 0.016931216931216932, 0.016913319238900635, 0.01689545934530095, 0.016877637130801686, 0.01685985247629083, 0.016842105263157894, 0.016824395373291272, 0.01680672268907563, 0.016789087093389297, 0.016771488469601678, 0.016753926701570682, 0.016736401673640166, 0.01671891327063741, 0.016701461377870562, 0.016684045881126174, 0.016666666666666666, 0.01664932362122789, 0.016632016632016633, 0.016614745586708203, 0.016597510373443983, 0.016580310880829015, 0.016563146997929608, 0.016546018614270942, 0.01652892561983471, 0.016511867905056758, 0.016494845360824743, 0.016477857878475798, 0.01646090534979424, 0.01644398766700925, 0.01642710472279261, 0.01641025641025641, 0.01639344262295082, 0.016376663254861822, 0.016359918200409, 0.01634320735444331, 0.0163265306122449, 0.0163098878695209, 0.016293279022403257, 0.01627670396744659, 0.016260162601626018, 0.016243654822335026, 0.016227180527383367, 0.016210739614994935, 0.016194331983805668, 0.016177957532861477, 0.01616161616161616, 0.016145307769929364, 0.016129032258064516, 0.016112789526686808, 0.01609657947686117, 0.016080402010050253, 0.01606425702811245, 0.0160481444332999, 0.01603206412825651, 0.016016016016016016, 0.016, 0.015984015984015984, 0.015968063872255488, 0.015952143569292122, 0.01593625498007968, 0.015920398009950248, 0.015904572564612324, 0.015888778550148957, 0.015873015873015872, 0.015857284440039643, 0.015841584158415842, 0.01582591493570722, 0.015810276679841896, 0.01579466929911155, 0.015779092702169626, 0.015763546798029555, 0.015748031496062992, 0.015732546705998034, 0.015717092337917484, 0.015701668302257114, 0.01568627450980392, 0.015670910871694418, 0.015655577299412915, 0.015640273704789834}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/selu_table.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/selu_table.tb new file mode 100644 index 0000000000..4b6f1b6ebf --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/selu_table.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t selu_table[1024] = {0.0, -0.01368163761509927, -0.02725680388157718, -0.04072632736604168, -0.05409103018714518, -0.06735172806576133, -0.0805092303747737, -0.09356434018847654, -0.1065178543315896, -0.11937056342789429, -0.1321232519484884, -0.14477669825966735, -0.1573316746704319, -0.1697889474796268, -0.18214927702271147, -0.1944134177181684, -0.206582118113548, -0.21865612093115805, -0.2306361631133956, -0.2425229758677261, -0.2543172847113135, -0.2660198095153023, -0.2776312645487555, -0.2891523585222498, -0.3005837946311329, -0.3119262705984425, -0.32318047871749306, -0.33434710589412897, -0.3454268336886513, -0.35642033835741715, -0.3673282908941144, -0.3781513570707166, -0.3888901974781189, -0.39954546756645776, -0.4101178176851149, -0.4206078931224147, -0.43101633414500673, -0.44134377603694613, -0.4515908491384689, -0.4617581788844636, -0.47184638584264665, -0.48185608575143773, -0.4917878895575422, -0.5016424034532402, -0.5114202289133869, -0.5211219627321221, -0.5307481970592971, -0.5402995194366167, -0.5497765128334996, -0.5591797556826608, -0.5685098219154158, -0.5777672809967115, -0.5869526979598833, -0.5960666334411424, -0.605109643713794, -0.6140822807221907, -0.6229850921154195, -0.6318186212807291, -0.6405834073766941, -0.6492799853661246, -0.6579088860487166, -0.6664706360934506, -0.6749657580707361, -0.6833947704843074, -0.6917581878028713, -0.7000565204915068, -0.7082902750428223, -0.7164599540078693, -0.7245660560268165, -0.7326090758593835, -0.7405895044150393, -0.7485078287829653, -0.7563645322617846, -0.7641600943890607, -0.7718949909705662, -0.7795696941093242, -0.7871846722344218, -0.7947403901296031, -0.8022373089616358, -0.8096758863084598, -0.8170565761871149, -0.8243798290814519, -0.8316460919696298, -0.8388558083513944, -0.84600941827515, -0.8531073583648168, -0.8601500618464805, -0.8671379585748353, -0.8740714750594192, -0.8809510344906459, -0.8877770567656371, -0.894549958513847, -0.901270153122495, -0.9079380507617956, -0.9145540584099926, -0.9211185798782006, -0.9276320158350512, -0.9340947638311474, -0.9405072183233297, -0.9468697706987503, -0.9531828092987631, -0.9594467194426249, -0.9656618834510152, -0.9718286806693692, -0.9779474874910338, -0.9840186773802382, -0.9900426208948913, -0.9960196857091963, -1.0019502366360937, -1.0078346356495276, -1.0136732419065377, -1.0194664117691814, -1.0252144988262848, -1.0309178539150248, -1.0365768251423402, -1.0421917579061812, -1.0477629949165894, -1.053290876216616, -1.0587757392030757, -1.0642179186471403, -1.0696177467147725, -1.0749755529869984, -1.0802916644800242, -1.0855664056651957, -1.0908000984888033, -1.0959930623917304, -1.1011456143289524, -1.106258068788882, -1.1113307378125625, -1.1163639310127156, -1.1213579555926367, -1.1263131163649474, -1.1312297157701972, -1.1361080538953259, -1.1409484284919773, -1.145751134994674, -1.150516466538849, -1.1552447139787365, -1.1599361659051262, -1.1645911086629754, -1.1692098263688884, -1.1737926009284552, -1.17833971205346, -1.1828514372789527, -1.1873280519801874, -1.1917698293894323, -1.1961770406126444, -1.200549954646017, -1.2048888383923995, -1.2091939566775858, -1.2134655722664798, -1.2177039458791326, -1.2219093362066555, -1.2260819999270098, -1.2302221917206733, -1.234330164286184, -1.2384061683555643, -1.2424504527096256, -1.2464632641931506, -1.2504448477299603, -1.2543954463378648, -1.2583153011434935, -1.2622046513970142, -1.2660637344867343, -1.2698927859535913, -1.2736920395055287, -1.2774617270317596, -1.281202078616922, -1.2849133225551204, -1.28859568536386, -1.2922493917978743, -1.2958746648628399, -1.2994717258289905, -1.303040794244621, -1.3065820879494874, -1.3100958230881041, -1.3135822141229347, -1.3170414738474838, -1.3204738133992822, -1.3238794422727767, -1.327258568332115, -1.3306113978238332, -1.3339381353894437, -1.337238984077927, -1.3405141453581229, -1.3437638191310286, -1.3469882037419991, -1.3501874959928548, -1.3533618911538907, -1.3565115829757968, -1.359636763701484, -1.3627376240778157, -1.3658143533672527, -1.3688671393594027, -1.3718961683824833, -1.374901625314695, -1.3778836935955039, -1.3808425552368395, -1.3837783908342027, -1.3866913795776896, -1.3895816992629273, -1.3924495263019268, -1.39529503573385, -1.398118401235693, -1.4009197951328867, -1.403699388409815, -1.4064573507202511, -1.4091938503977108, -1.4119090544657305, -1.4146031286480565, -1.4172762373787642, -1.4199285438122922, -1.422560209833402, -1.4251713960670567, -1.4277622618882273, -1.4303329654316184, -1.43288366360132, -1.4354145120803856, -1.4379256653403332, -1.4404172766505734, -1.4428894980877665, -1.4453424805451005, -1.4477763737415055, -1.4501913262307875, -1.4525874854107, -1.4549649975319354, -1.4573240077070568, -1.4596646599193506, -1.461987097031617, -1.4642914607948891, -1.4665778918570846, -1.4688465297715905, -1.47109751300578, -1.4733309789494666, -1.4755470639232853, -1.4777459031870175, -1.4799276309478433, -1.482092380368535, -1.484240283575584, -1.4863714716672647, -1.4884860747216366, -1.4905842218044845, -1.4926660409771946, -1.4947316593045716, -1.4967812028625942, -1.4988147967461103, -1.5008325650764707, -1.502834631009108, -1.5048211167410503, -1.5067921435183818, -1.5087478316436411, -1.5106883004831662, -1.5126136684743778, -1.51452405313301, -1.5164195710602824, -1.5183003379500157, -1.520166468595695, -1.522018076897475, -1.523855275869133, -1.5256781776449653, -1.5274868934866321, -1.52928153378995, -1.5310622080916263, -1.5328290250759493, -1.5345820925814166, -1.536321517607323, -1.538047406320286, -1.5397598640607284, -1.5414589953493085, -1.5431449038932965, -1.5448176925929078, -1.546477463547581, -1.5481243180622113, -1.5497583566533322, -1.5513796790552519, -1.5529883842261398, -1.5545845703540668, -1.556168334862998, -1.5577397744187398, -1.5592989849348382, -1.560846061578435, -1.5623810987760756, -1.563904190219471, -1.5654154288712185, -1.5669149069704746, -1.5684027160385854, -1.5698789468846706, -1.5713436896111697, -1.5727970336193382, -1.5742390676147062, -1.5756698796124915, -1.5770895569429717, -1.578498186256816, -1.579895853530371, -1.581282644070912, -1.5826586425218463, -1.5840239328678813, -1.5853785984401512, -1.586722721921301, -1.5880563853505345, -1.5893796701286214, -1.5906926570228663, -1.5919954261720366, -1.593288057091255, -1.5945706286768537, -1.5958432192111875, -1.5971059063674138, -1.5983587672142323, -1.5996018782205896, -1.600835315260346, -1.6020591536169069, -1.6032734679878176, -1.6044783324893224, -1.6056738206608887, -1.6068600054696942, -1.6080369593150832, -1.6092047540329824, -1.6103634609002875, -1.611513150639213, -1.6126538934216086, -1.6137857588732423, -1.6149088160780507, -1.6160231335823536, -1.6171287793990405, -1.6182258210117193, -1.6193143253788378, -1.620394358937767, -1.6214659876088595, -1.6225292767994715, -1.6235842914079552, -1.6246310958276189, -1.62566975395066, -1.6267003291720603, -1.6277228843934608, -1.6287374820269958, -1.6297441839991058, -1.630743051754315, -1.6317341462589836, -1.6327175280050266, -1.6336932570136085, -1.6346613928388043, -1.6356219945712358, -1.636575120841678, -1.6375208298246384, -1.6384591792419052, -1.6393902263660736, -1.6403140280240387, -1.6412306406004666, -1.6421401200412329, -1.6430425218568405, -1.6439379011258048, -1.6448263124980176, -1.6457078101980815, -1.6465824480286202, -1.6474502793735615, -1.648311357201397, -1.6491657340684132, -1.6500134621219014, -1.650854593103339, -1.6516891783515477, -1.6525172688058276, -1.6533389150090665, -1.6541541671108233, -1.654963074870391, -1.6557656876598317, -1.656562054466992, -1.6573522238984915, -1.6581362441826901, -1.658914163172632, -1.6596860283489656, -1.6604518868228422, -1.661211785338791, -1.6619657702775732, -1.6627138876590106, -1.6634561831447972, -1.664192702041284, -1.6649234893022462, -1.6656485895316249, -1.6663680469862507, -1.6670819055785457, -1.667790208879202, -1.6684930001198421, -1.6691903221956574, -1.6698822176680264, -1.6705687287671118, -1.671249897394439, -1.6719257651254533, -1.672596373212057, -1.6732617625851276, -1.6739219738570155, -1.674577047324025, -1.6752270229688695, -1.6758719404631168, -1.6765118391696068, -1.6771467581448556, -1.6777767361414395, -1.678401811610359, -1.6790220227033874, -1.6796374072753986, -1.6802480028866762, -1.6808538468052092, -1.6814549760089634, -1.6820514271881404, -1.682643236747416, -1.6832304408081635, -1.6838130752106564, -1.684391175516257, -1.6849647770095881, -1.6855339147006838, -1.6860986233271293, -1.6866589373561784, -1.68721489098686, -1.6877665181520636, -1.6883138525206096, -1.6888569274993077, -1.689395776234992, -1.689930431616547, -1.6904609262769132, -1.6909872925950797, -1.6915095626980607, -1.6920277684628553, -1.6925419415183944, -1.6930521132474705, -1.693558314788653, -1.6940605770381891, -1.6945589306518898, -1.6950534060470006, -1.695544033404058, -1.6960308426687312, -1.6965138635536514, -1.6969931255402235, -1.6974686578804266, -1.6979404895985992, -1.6984086494932098, -1.698873166138617, -1.6993340678868099, -1.6997913828691429, -1.7002451389980486, -1.700695363968744, -1.7011420852609211, -1.701585330140422, -1.7020251256609051, -1.7024614986654947, -1.7028944757884208, -1.7033240834566445, -1.7037503478914702, -1.7041732951101467, -1.7045929509274547, -1.705009340957283, -1.7054224906141915, -1.705832425114962, -1.7062391694801382, -1.7066427485355524, -1.7070431869138405, -1.707440509055946, -1.7078347392126108, -1.7082259014458567, -1.7086140196304533, -1.7089991174553754, -1.7093812184252475, -1.7097603458617814, -1.7101365229051972, -1.7105097725156357, -1.7108801174745618, -1.7112475803861524, -1.7116121836786782, -1.7119739496058706, -1.7123329002482826, -1.7126890575146336, -1.7130424431431488, -1.7133930787028848, -1.713740985595047, -1.7140861850542948, -1.714428698150038, -1.7147685457877238, -1.7151057487101107, -1.7154403274985366, -1.7157723025741742, -1.7161016941992775, -1.7164285224784175, -1.716752807359711, -1.717074568636037, -1.717393825946245, -1.7177105987763543, -1.718024906460742, -1.7183367681833244, -1.7186462029787277, -1.7189532297334487, -1.7192578671870087, -1.7195601339330968, -1.719860048420705, -1.7201576289552538, -1.7204528936997103, -1.720745860675696, -1.7210365477645868, -1.7213249727086049, -1.7216111531119014, -1.7218951064416308, -1.7221768500290169, -1.722456401070411, -1.722733776628342, -1.7230089936325557, -1.7232820688810508, -1.723553019041103, -1.7238218606502809, -1.724088610117458, -1.7243532837238122, -1.7246158976238213, -1.7248764678462465, -1.7251350102951135, -1.725391540750681, -1.725646074870405, -1.7258986281898945, -1.7261492161238592, -1.7263978539670506, -1.7266445568951954, -1.7268893399659218, -1.7271322181196793, -1.7273732061806486, -1.7276123188576493, -1.7278495707450348, -1.7280849763235844, -1.7283185499613882, -1.728550305914722, -1.728780258328918, -1.7290084212392296, -1.7292348085716867, -1.729459434143945, -1.7296823116661322, -1.729903454741681, -1.7301228768681627, -1.730340591438109, -1.7305566117398306, -1.7307709509582265, -1.730983622175592, -1.7311946383724137, -1.731404012428164, -1.7316117571220855, -1.7318178851339725, -1.7320224090449448, -1.7322253413382147, -1.7324266943998492, -1.732626480519527, -1.7328247118912867, -1.7330214006142721, -1.7332165586934722, -1.7334101980404508, -1.733602330474076, -1.7337929677212405, -1.7339821214175777, -1.7341698031081716, -1.734356024248262, -1.7345407962039427, -1.7347241302528569, -1.7349060375848833, -1.7350865293028206, -1.735265616423066, -1.7354433098762845, -1.7356196205080796, -1.7357945590796522, -1.7359681362684605, -1.7361403626688687, -1.7363112487927956, -1.736480805070356, -1.736649041850496, -1.736815969401626, -1.7369815979122472, -1.7371459374915725, -1.7373089981701453, -1.7374707899004505, -1.737631322557521, -1.7377906059395432, -1.7379486497684522, -1.738105463690527, -1.738261057276979, -1.7384154400245344, -1.738568621356017, -1.7387206106209205, -1.738871417095981, -1.7390210499857417, -1.739169518423116, -1.7393168314699443, -1.739462998117547, -1.7396080272872738, -1.7397519278310478, -1.7398947085319056, -1.7400363781045343, -1.740176945195802, -1.7403164183852868, -1.7404548061858, -1.7405921170439056, -1.7407283593404361, -1.7408635413910034, -1.740997671446507, -1.7411307576936372, -1.741262808255376, -1.7413938311914903, -1.7415238344990265, -1.741652826112797, -1.7417808139058655, -1.741907805690026, -1.742033809216282, -1.7421588321753172, -1.7422828821979663, -1.7424059668556802, -1.7425280936609888, -1.7426492700679581, -1.7427695034726474, -1.742888801213558, -1.7430071705720833, -1.7431246187729525, -1.7432411529846714, -1.7433567803199599, -1.7434715078361862, -1.743585342535798, -1.7436982913667498, -1.7438103612229252, -1.743921558944561, -1.7440318913186612, -1.7441413650794146, -1.744249986908603, -1.7443577634360103, -1.7444647012398273, -1.7445708068470525, -1.7446760867338913, -1.744780547326151, -1.7448841949996323, -1.744987036080519, -1.7450890768457656, -1.7451903235234767, -1.7452907822932908, -1.745390459286756, -1.7454893605877038, -1.7455874922326218, -1.7456848602110202, -1.7457814704657992, -1.7458773288936111, -1.7459724413452193, -1.746066813625857, -1.7461604514955802, -1.74625336066962, -1.7463455468187312, -1.7464370155695383, -1.7465277725048798, -1.746617823164147, -1.7467071730436239, -1.7467958275968223, -1.7468837922348144, -1.7469710723265626, -1.7470576731992482, -1.7471436001385956, -1.7472288583891962, -1.747313453154827, -1.7473973895987684, -1.7474806728441208, -1.747563307974116, -1.747645300032428, -1.7477266540234797, -1.7478073749127503, -1.7478874676270773, -1.7479669370549562, -1.7480457880468407, -1.7481240254154384, -1.7482016539360037, -1.7482786783466284, -1.7483551033485347, -1.7484309336063575, -1.7485061737484322, -1.748580828367076, -1.7486549020188682, -1.7487283992249292, -1.7488013244711949, -1.7488736822086914, -1.7489454768538075, -1.749016712788563, -1.7490873943608758, -1.749157525884829, -1.7492271116409333, -1.749296155876387, -1.7493646628053376, -1.7494326366091377, -1.7495000814366, -1.749567001404251, -1.7496334005965817, -1.7496992830662985, -1.7497646528345683, -1.7498295138912656, -1.7498938701952151, -1.749957725674434, -1.750021084226371, -1.7500839497181457, -1.7501463259867815, -1.7502082168394437, -1.7502696260536696, -1.7503305573775991, -1.7503910145302035, -1.750451001201514, -1.7505105210528447, -1.7505695777170178, -1.750628174798584, -1.7506863158740438, -1.7507440044920646, -1.7508012441736982, -1.7508580384125954, -1.7509143906752191, -1.750970304401056, -1.7510257830028262, -1.7510808298666922, -1.751135448352465, -1.7511896417938095, -1.7512434134984474, -1.7512967667483603, -1.7513497047999877, -1.7514022308844297, -1.751454348207639, -1.7515060599506218, -1.751557369269628, -1.7516082792963457, -1.7516587931380925, -1.751708913878004, -1.7517586445752231, -1.751807988265086, -1.7518569479593078, -1.7519055266461667, -1.7519537272906853, -1.752001552834812, -1.7520490061976015, -1.7520960902753921, -1.7521428079419827, -1.7521891620488075, -1.7522351554251108, -1.7522807908781204, -1.752326071193217, -1.752370999134106, -1.752415577442985, -1.752459808840712, -1.7525036960269713, -1.7525472416804375, -1.7525904484589399, -1.7526333189996244, -1.7526758559191145, -1.7527180618136708, -1.75275993925935, -1.7528014908121614, -1.7528427190082232, -1.7528836263639183, -1.7529242153760454, -1.7529644885219744, -1.753004448259795, -1.7530440970284693, -1.7530834372479773, -1.7531224713194686, -1.7531612016254055, -1.7531996305297102, -1.7532377603779092, -1.7532755934972755, -1.7533131321969708, -1.753350378768188, -1.753387335484288, -1.7534240046009413, -1.7534603883562652, -1.7534964889709588, -1.75353230864844, -1.7535678495749798, -1.7536031139198347, -1.7536381038353808, -1.753672821457244, -1.75370726890443, -1.7537414482794556, -1.7537753616684741, -1.753809011141406, -1.753842398752062, -1.7538755265382702, -1.7539083965220008, -1.7539410107094888, -1.753973371091356, -1.7540054796427338, -1.7540373383233825, -1.754068949077811, -1.7541003138353972, -1.7541314345105035, -1.7541623130025947, -1.7541929511963539, -1.7542233509617977, -1.75425351415439, -1.754283442615155, -1.7543131381707908, -1.7543426026337794, -1.7543718378024977, -1.754400845461328, -1.7544296273807662, -1.7544581853175307, -1.754486521014668, -1.7545146362016608, -1.7545425325945327, -1.7545702118959527, -1.7545976757953403, -1.7546249259689672, -1.7546519640800604, -1.754678791778904, -1.7547054107029387, -1.754731822476863, -1.7547580287127322, -1.7547840310100558, -1.7548098309558962, -1.7548354301249651, -1.7548608300797202, -1.754886032370459, -1.7549110385354152, -1.7549358501008518, -1.7549604685811544, -1.7549848954789236, -1.7550091322850658, -1.7550331804788866, -1.7550570415281779, -1.7550807168893108, -1.7551042080073216, -1.7551275163160018, -1.7551506432379846, -1.7551735901848329, -1.7551963585571237, -1.7552189497445359, -1.7552413651259329, -1.755263606069448, -1.7552856739325675, -1.7553075700622136, -1.755329295794827, -1.7553508524564478, -1.7553722413627968, -1.7553934638193562, -1.7554145211214487, -1.7554354145543158, -1.7554561453931985, -1.7554767149034134, -1.7554971243404296, -1.755517374949947, -1.7555374679679705, -1.7555574046208875, -1.7555771861255407, -1.7555968136893032, -1.7556162885101527, -1.755635611776744, -1.7556547846684818, -1.755673808355592, -1.755692683999195, -1.755711412751374, -1.7557299957552472, -1.755748434145037, -1.7557667290461392, -1.755784881575191, -1.7558028928401412, -1.7558207639403158, -1.7558384959664852, -1.7558560900009323, -1.7558735471175173, -1.755890868381743, -1.755908054850821, -1.7559251075737359, -1.7559420275913078, -1.7559588159362585, -1.7559754736332724, -1.7559920016990596, -1.7560084011424184, -1.7560246729642965, -1.7560408181578522, -1.7560568377085146, -1.7560727325940448, -1.7560885037845946, -1.7561041522427658, -1.7561196789236693, -1.7561350847749835, -1.7561503707370114, -1.7561655377427396, -1.7561805867178921, -1.7561955185809914, -1.7562103342434097, -1.7562250346094284, -1.7562396205762916, -1.7562540930342598, -1.7562684528666668, -1.7562827009499709, -1.7562968381538115, -1.7563108653410588, -1.7563247833678692, -1.7563385930837356, -1.7563522953315402, -1.7563658909476068, -1.7563793807617494, -1.7563927655973257, -1.7564060462712852, -1.75641922359422, -1.7564322983704141, -1.7564452713978926, -1.7564581434684703, -1.7564709153678, -1.7564835878754208, -1.7564961617648047, -1.7565086378034052, -1.7565210167527028, -1.756533299368253, -1.7565454863997307, -1.7565575785909768, -1.756569576680044, -1.7565814813992415, -1.7565932934751791, -1.7566050136288127, -1.756616642575487, -1.7566281810249802, -1.7566396296815472, -1.756650989243962, -1.7566622604055604, -1.7566734438542837, -1.7566845402727185, -1.7566955503381398, -1.7567064747225525, -1.7567173140927312, -1.7567280691102618, -1.7567387404315822, -1.756749328708021, -1.7567598345858384, -1.7567702587062666, -1.756780601705546, -1.7567908642149666, -1.756801046860905, -1.756811150264864, -1.7568211750435097, -1.7568311218087092, -1.7568409911675675, -1.7568507837224658, -1.7568605000710975, -1.7568701408065042, -1.756879706517113, -1.7568891977867709, -1.7568986151947823, -1.7569079593159436, -1.7569172307205767, -1.7569264299745666, -1.7569355576393941, -1.7569446142721705, -1.756953600425671, -1.7569625166483704, -1.7569713634844746, -1.7569801414739536, -1.7569888511525766, -1.756997493051943, -1.757006067699515, -1.75701457561865, -1.757023017328633, -1.757031393344708, -1.7570397041781085, -1.7570479503360905, -1.7570561323219618, -1.757064250635114, -1.7570723057710522, -1.7570802982214249, -1.7570882284740554, -1.7570960970129708, -1.75710390431843, -1.757111650866956, -1.7571193371313636, -1.757126963580787, -1.7571345306807096, -1.757142038892994, -1.7571494886759067, -1.7571568804841484, -1.7571642147688824, -1.7571714919777597, -1.757178712554948, -1.7571858769411592, -1.7571929855736745, -1.7572000388863727, -1.757207037309756, -1.7572139812709762, -1.7572208711938615, -1.757227707498942, -1.7572344906034738, -1.7572412209214674, -1.757247898863711, -1.7572545248377955, -1.757261099248141, -1.757267622496019, -1.7572740949795795, -1.7572805170938732, -1.7572868892308768, -1.7572932117795168, -1.7572994851256933, -1.7573057096523024, -1.757311885739261, -1.7573180137635298, -1.757324094099135, -1.7573301271171933, -1.7573361131859329, -1.7573420526707155, -1.757347945934061, -1.7573537933356675, -1.7573595952324335, -1.7573653519784809, -1.757371063925175, -1.7573767314211473, -1.7573823548123158, -1.7573879344419072, -1.7573934706504766, -1.757398963775929, -1.7574044141535403, -1.7574098221159762, -1.7574151879933146, -1.7574205121130646, -1.7574257948001855, -1.7574310363771095, -1.757436237163758, -1.7574413974775638, -1.757446517633489, -1.7574515979440446, -1.7574566387193096, -1.7574616402669505, -1.757466602892239, -1.757471526898071, -1.7574764125849855, -1.7574812602511827, -1.7574860701925428, -1.7574908427026417, -1.7574955780727728, -1.7575002765919605, -1.7575049385469825}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/sigmoid_table.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/sigmoid_table.tb new file mode 100644 index 0000000000..7a063bfd6a --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/sigmoid_table.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t sigmoid_table[1024] = {0.5009765612582384, 0.5029296539728477, 0.5048826572854918, 0.5068355116069857, 0.5087881573663283, 0.5107405350179702, 0.5126925850490733, 0.5146442479867636, 0.5165954644053721, 0.5185461749336651, 0.5204963202620584, 0.5224458411498177, 0.5243946784322399, 0.5263427730278142, 0.5282900659453637, 0.5302364982911614, 0.5321820112760233, 0.5341265462223723, 0.5360700445712753, 0.5380124478894484, 0.5399536978762308, 0.541893736370524, 0.5438325053576957, 0.5457699469764457, 0.5477060035256329, 0.5496406174710619, 0.5515737314522255, 0.5535052882890054, 0.5554352309883238, 0.5573635027507514, 0.5592900469770627, 0.5612148072747433, 0.5631377274644433, 0.5650587515863784, 0.5669778239066745, 0.5688948889236563, 0.5708098913740778, 0.5727227762392935, 0.5746334887513667, 0.5765419743991188, 0.5784481789341114, 0.5803520483765656, 0.5822535290212146, 0.5841525674430875, 0.5860491105032255, 0.5879431053543276, 0.5898344994463247, 0.591723240531882, 0.593609276671827, 0.5954925562405032, 0.5973730279310485, 0.5992506407605962, 0.6011253440753982, 0.60299708755587, 0.6048658212215559, 0.6067314954360127, 0.6085940609116138, 0.6104534687142686, 0.6123096702680609, 0.6141626173598018, 0.6160122621434994, 0.6178585571447408, 0.6197014552649915, 0.6215409097858049, 0.6233768743729451, 0.625209303080424, 0.6270381503544469, 0.628863371037271, 0.6306849203709737, 0.6325027540011314, 0.6343168279804077, 0.6361270987720502, 0.6379335232532979, 0.6397360587186955, 0.6415346628833171, 0.6433292938858967, 0.6451199102918682, 0.6469064710963125, 0.6486889357268106, 0.6504672640462058, 0.652241416355273, 0.6540113533952943, 0.6557770363505423, 0.6575384268506705, 0.6592954869730118, 0.6610481792447818, 0.6627964666451921, 0.6645403126074702, 0.6662796810207858, 0.6680145362320873, 0.6697448430478439, 0.6714705667356973, 0.6731916730260219, 0.6749081281133934, 0.6766198986579663, 0.6783269517867622, 0.6800292550948656, 0.6817267766465328, 0.6834194849762091, 0.6851073490894588, 0.6867903384638052, 0.6884684230494847, 0.6901415732701115, 0.6918097600232571, 0.6934729546809425, 0.6951311290900456, 0.6967842555726225, 0.6984323069261458, 0.7000752564236576, 0.7017130778138413, 0.7033457453210089, 0.7049732336450086, 0.7065955179610504, 0.7082125739194518, 0.7098243776453041, 0.711430905738061, 0.7130321352710473, 0.7146280437908932, 0.7162186093168913, 0.7178038103402775, 0.7193836258234395, 0.72095803519905, 0.7225270183691288, 0.7240905557040317, 0.7256486280413706, 0.7272012166848617, 0.7287483034031065, 0.7302898704283038, 0.7318259004548958, 0.7333563766381476, 0.7348812825926613, 0.7364006023908275, 0.7379143205612128, 0.7394224220868858, 0.740924892403682, 0.7424217173984088, 0.743912883406992, 0.7453983772125649, 0.7468781860434985, 0.7483522975713798, 0.7498206999089312, 0.7512833816078802, 0.7527403316567737, 0.7541915394787425, 0.7556369949292148, 0.7570766882935809, 0.758510610284809, 0.7599387520410151, 0.7613611051229845, 0.7627776615116516, 0.7641884136055329, 0.7655933542181188, 0.7669924765752232, 0.7683857743122923, 0.7697732414716741, 0.7711548724998493, 0.7725306622446245, 0.7739006059522892, 0.775264699264738, 0.776622938216557, 0.7779753192320792, 0.7793218391224057, 0.780662495082397, 0.7819972846876342, 0.7833262058913498, 0.7846492570213327, 0.7859664367768047, 0.7872777442252712, 0.7885831787993471, 0.7898827402935596, 0.7911764288611264, 0.792464245010713, 0.7937461896031696, 0.7950222638482464, 0.7962924693012908, 0.7975568078599278, 0.79881528176072, 0.800067893575815, 0.8013146462095748, 0.8025555428951934, 0.8037905871912979, 0.8050197829785412, 0.8062431344561782, 0.8074606461386356, 0.8086723228520697, 0.8098781697309154, 0.8110781922144276, 0.8122723960432147, 0.8134607872557665, 0.8146433721849758, 0.8158201574546557, 0.8169911499760516, 0.8181563569443524, 0.819315785835197, 0.8204694444011801, 0.8216173406683573, 0.8227594829327498, 0.823895879756849, 0.825026539966124, 0.8261514726455292, 0.8272706871360163, 0.8283841930310485, 0.8294920001731195, 0.8305941186502768, 0.8316905587926504, 0.8327813311689884, 0.8338664465831974, 0.8349459160708924, 0.8360197508959527, 0.8370879625470882, 0.8381505627344117, 0.8392075633860248, 0.8402589766446108, 0.8413048148640399, 0.8423450906059847, 0.843379816636549, 0.8444090059229068, 0.8454326716299558, 0.8464508271169839, 0.8474634859343486, 0.8484706618201715, 0.8494723686970466, 0.8504686206687655, 0.8514594320170558, 0.8524448171983366, 0.8534247908404916, 0.8543993677396563, 0.8553685628570248, 0.8563323913156724, 0.8572908683973974, 0.8582440095395809, 0.8591918303320635, 0.8601343465140441, 0.8610715739709943, 0.8620035287315965, 0.862930226964697, 0.863851684976284, 0.8647679192064821, 0.865678946226571, 0.8665847827360217, 0.8674854455595564, 0.8683809516442299, 0.8692713180565296, 0.870156561979502, 0.8710367007098975, 0.8719117516553389, 0.8727817323315139, 0.8736466603593868, 0.8745065534624367, 0.8753614294639163, 0.8762113062841353, 0.8770562019377661, 0.8778961345311737, 0.8787311222597683, 0.8795611834053839, 0.8803863363336775, 0.8812065994915543, 0.8820219914046175, 0.8828325306746402, 0.8836382359770631, 0.8844391260585168, 0.8852352197343663, 0.8860265358862826, 0.8868130934598374, 0.8875949114621223, 0.8883720089593933, 0.8891444050747391, 0.8899121189857752, 0.8906751699223611, 0.8914335771643437, 0.8921873600393251, 0.8929365379204537, 0.8936811302242426, 0.8944211564084097, 0.8951566359697456, 0.8958875884420037, 0.8966140333938164, 0.8973359904266346, 0.8980534791726937, 0.8987665192930021, 0.8994751304753559, 0.9001793324323768, 0.9008791448995754, 0.9015745876334377, 0.9022656804095371, 0.9029524430206697, 0.9036348952750146, 0.904313056994317, 0.9049869480120961, 0.9056565881718779, 0.9063219973254493, 0.9069831953311379, 0.9076402020521152, 0.9082930373547222, 0.9089417211068199, 0.9095862731761619, 0.910226713428791, 0.9108630617274585, 0.9114953379300658, 0.9121235618881309, 0.9127477534452755, 0.9133679324357356, 0.9139841186828943, 0.9145963319978375, 0.9152045921779318, 0.9158089190054228, 0.9164093322460585, 0.9170058516477309, 0.9175984969391425, 0.9181872878284922, 0.9187722440021829, 0.9193533851235517, 0.9199307308316192, 0.9205043007398609, 0.9210741144349998, 0.9216401914758181, 0.922202551391991, 0.9227612136829397, 0.9233161978167049, 0.9238675232288402, 0.9244152093213263, 0.9249592754615029, 0.9254997409810218, 0.9260366251748193, 0.9265699473001059, 0.927099726575378, 0.9276259821794456, 0.9281487332504805, 0.9286679988850831, 0.929183798137365, 0.929696150018054, 0.9302050734936125, 0.9307105874853785, 0.9312127108687195, 0.9317114624722079, 0.9322068610768112, 0.9326989254150999, 0.933187674170474, 0.9336731259764038, 0.9341552994156889, 0.9346342130197337, 0.935109885267838, 0.935582334586506, 0.9360515793487681, 0.9365176378735215, 0.9369805284248849, 0.9374402692115686, 0.9378968783862601, 0.9383503740450254, 0.9388007742267246, 0.9392480969124417, 0.9396923600249307, 0.9401335814280749, 0.9405717789263597, 0.9410069702643621, 0.941439173126251, 0.9418684051353043, 0.9422946838534367, 0.9427180267807429, 0.9431384513550535, 0.9435559749515046, 0.9439706148821183, 0.9443823883953985, 0.9447913126759381, 0.9451974048440384, 0.9456006819553416, 0.9460011610004747, 0.9463988589047058, 0.9467937925276119, 0.947185978662759, 0.9475754340373931, 0.9479621753121426, 0.9483462190807312, 0.9487275818697054, 0.9491062801381662, 0.949482330277519, 0.9498557486112288, 0.9502265513945876, 0.9505947548144925, 0.9509603749892346, 0.9513234279682939, 0.9516839297321508, 0.9520418961921002, 0.9523973431900807, 0.9527502864985101, 0.9531007418201309, 0.9534487247878658, 0.9537942509646805, 0.954137335843458, 0.954477994846878, 0.9548162433273092, 0.9551520965667056, 0.9554855697765151, 0.955816678097593, 0.9561454366001265, 0.9564718602835648, 0.9567959640765575, 0.9571177628369031, 0.9574372713515003, 0.9577545043363116, 0.9580694764363313, 0.9583822022255619, 0.9586926962069957, 0.959000972812607, 0.959307046403347, 0.9596109312691483, 0.9599126416289347, 0.9602121916306376, 0.9605095953512194, 0.9608048667967017, 0.9610980199022023, 0.961389068531974, 0.9616780264794544, 0.9619649074673171, 0.9622497251475317, 0.9625324931014263, 0.9628132248397597, 0.9630919338027948, 0.9633686333603795, 0.9636433368120323, 0.9639160573870332, 0.9641868082445192, 0.9644556024735851, 0.9647224530933884, 0.9649873730532592, 0.965250375232816, 0.9655114724420827, 0.9657706774216144, 0.9660280028426234, 0.9662834613071121, 0.9665370653480092, 0.9667888274293097, 0.9670387599462187, 0.9672868752253001, 0.9675331855246277, 0.9677777030339422, 0.9680204398748077, 0.9682614081007771, 0.9685006196975565, 0.9687380865831757, 0.9689738206081602, 0.9692078335557077, 0.9694401371418673, 0.969670743015721, 0.9698996627595705, 0.9701269078891237, 0.9703524898536865, 0.9705764200363561, 0.9707987097542187, 0.9710193702585468, 0.9712384127350021, 0.9714558483038396, 0.9716716880201136, 0.9718859428738872, 0.9720986237904434, 0.9723097416304984, 0.972519307190418, 0.9727273312024342, 0.9729338243348664, 0.9731387971923422, 0.9733422603160213, 0.9735442241838218, 0.9737446992106472, 0.9739436957486151, 0.9741412240872889, 0.9743372944539102, 0.9745319170136325, 0.9747251018697572, 0.9749168590639715, 0.9751071985765847, 0.9752961303267709, 0.9754836641728087, 0.9756698099123244, 0.975854577282535, 0.9760379759604947, 0.9762200155633391, 0.9764007056485348, 0.9765800557141248, 0.9767580751989804, 0.9769347734830501, 0.977110159887611, 0.9772842436755209, 0.9774570340514706, 0.9776285401622385, 0.9777987710969442, 0.977967735887304, 0.9781354435078863, 0.9783019028763682, 0.9784671228537921, 0.9786311122448239, 0.9787938797980108, 0.9789554342060389, 0.9791157841059936, 0.9792749380796175, 0.9794329046535715, 0.979589692299694, 0.979745309435261, 0.9798997644232479, 0.9800530655725895, 0.9802052211384412, 0.9803562393224404, 0.9805061282729682, 0.9806548960854106, 0.9808025508024198, 0.980949100414177, 0.9810945528586532, 0.9812389160218716, 0.9813821977381683, 0.9815244057904553, 0.9816655479104817, 0.9818056317790944, 0.9819446650265006, 0.9820826552325287, 0.9822196099268891, 0.9823555365894354, 0.9824904426504252, 0.9826243354907803, 0.9827572224423471, 0.982889110788156, 0.9830200077626816, 0.9831499205521023, 0.9832788562945579, 0.9834068220804095, 0.9835338249524974, 0.9836598719063984, 0.9837849698906836, 0.9839091258071755, 0.9840323465112041, 0.9841546388118627, 0.9842760094722643, 0.9843964652097964, 0.9845160126963748, 0.9846346585586978, 0.9847524093785005, 0.9848692716928064, 0.9849852519941804, 0.9851003567309808, 0.9852145923076094, 0.9853279650847626, 0.9854404813796805, 0.9855521474663975, 0.9856629695759892, 0.985772953896821, 0.985882106574795, 0.985990433713597, 0.9860979413749407, 0.9862046355788149, 0.9863105223037255, 0.986415607486941, 0.9865198970247337, 0.9866233967726227, 0.9867261125456144, 0.9868280501184434, 0.9869292152258129, 0.9870296135626316, 0.9871292507842533, 0.9872281325067137, 0.9873262643069669, 0.9874236517231209, 0.9875203002546722, 0.9876162153627396, 0.987711402470298, 0.9878058669624095, 0.9878996141864553, 0.9879926494523661, 0.9880849780328519, 0.9881766051636294, 0.9882675360436518, 0.9883577758353339, 0.9884473296647792, 0.9885362026220039, 0.9886243997611625, 0.9887119261007691, 0.9887987866239217, 0.9888849862785224, 0.9889705299774982, 0.9890554225990201, 0.9891396689867223, 0.9892232739499199, 0.9893062422638245, 0.9893885786697612, 0.9894702878753826, 0.9895513745548833, 0.9896318433492118, 0.9897116988662837, 0.9897909456811917, 0.9898695883364156, 0.9899476313420319, 0.9900250791759213, 0.9901019362839765, 0.9901782070803069, 0.9902538959474461, 0.9903290072365529, 0.9904035452676181, 0.9904775143296634, 0.9905509186809447, 0.9906237625491514, 0.9906960501316063, 0.9907677855954636, 0.9908389730779061, 0.9909096166863416, 0.9909797204985978, 0.9910492885631179, 0.991118324899153, 0.991186833496954, 0.9912548183179649, 0.9913222832950107, 0.9913892323324893, 0.9914556693065582, 0.9915215980653225, 0.9915870224290223, 0.9916519461902167, 0.9917163731139695, 0.9917803069380321, 0.9918437513730266, 0.9919067101026269, 0.9919691867837398, 0.9920311850466846, 0.9920927084953715, 0.9921537607074791, 0.9922143452346321, 0.9922744656025764, 0.9923341253113541, 0.9923933278354767, 0.9924520766240994, 0.9925103751011916, 0.9925682266657089, 0.9926256346917622, 0.9926826025287876, 0.9927391335017144, 0.9927952309111313, 0.9928508980334536, 0.9929061381210885, 0.9929609544025984, 0.9930153500828657, 0.9930693283432538, 0.9931228923417693, 0.9931760452132232, 0.9932287900693889, 0.9932811299991624, 0.9933330680687192, 0.993384607321672, 0.9934357507792255, 0.9934865014403319, 0.9935368622818452, 0.9935868362586746, 0.993636426303936, 0.9936856353291037, 0.993734466224161, 0.9937829218577495, 0.9938310050773187, 0.9938787187092715, 0.9939260655591138, 0.9939730484115986, 0.9940196700308718, 0.9940659331606164, 0.9941118405241954, 0.9941573948247951, 0.9942025987455659, 0.9942474549497627, 0.9942919660808865, 0.9943361347628211, 0.9943799635999729, 0.9944234551774075, 0.994466612060986, 0.9945094367975008, 0.9945519319148104, 0.9945940999219731, 0.99463594330938, 0.9946774645488873, 0.9947186660939477, 0.994759550379741, 0.9948001198233035, 0.9948403768236574, 0.9948803237619381, 0.9949199630015225, 0.9949592968881543, 0.994998327750071, 0.9950370578981272, 0.99507548962592, 0.9951136252099114, 0.9951514669095514, 0.9951890169673988, 0.9952262776092429, 0.9952632510442234, 0.9952999394649492, 0.995336345047618, 0.9953724699521328, 0.9954083163222202, 0.9954438862855455, 0.9954791819538289, 0.9955142054229602, 0.9955489587731127, 0.9955834440688559, 0.9956176633592693, 0.9956516186780524, 0.9956853120436374, 0.9957187454592983, 0.9957519209132613, 0.995784840378813, 0.9958175058144085, 0.9958499191637799, 0.9958820823560408, 0.9959139973057952, 0.9959456659132401, 0.9959770900642708, 0.9960082716305863, 0.9960392124697902, 0.9960699144254949, 0.9961003793274221, 0.9961306089915051, 0.9961606052199882, 0.9961903698015271, 0.9962199045112877, 0.9962492111110447, 0.9962782913492785, 0.9963071469612743, 0.9963357796692157, 0.9963641911822829, 0.9963923831967464, 0.9964203573960624, 0.9964481154509656, 0.9964756590195629, 0.9965029897474263, 0.9965301092676847, 0.9965570192011142, 0.9965837211562298, 0.9966102167293753, 0.9966365075048124, 0.9966625950548095, 0.9966884809397303, 0.9967141667081211, 0.9967396538967973, 0.996764944030931, 0.9967900386241352, 0.9968149391785505, 0.9968396471849287, 0.9968641641227171, 0.9968884914601416, 0.9969126306542909, 0.9969365831511966, 0.9969603503859165, 0.9969839337826155, 0.9970073347546454, 0.9970305547046259, 0.9970535950245228, 0.9970764570957289, 0.99709914228914, 0.9971216519652336, 0.9971439874741471, 0.9971661501557534, 0.9971881413397367, 0.9972099623456693, 0.9972316144830862, 0.9972530990515591, 0.9972744173407714, 0.9972955706305906, 0.9973165601911425, 0.9973373872828826, 0.9973580531566689, 0.9973785590538324, 0.9973989062062492, 0.9974190958364102, 0.9974391291574908, 0.997459007373421, 0.9974787316789544, 0.9974983032597357, 0.9975177232923699, 0.9975369929444886, 0.997556113374818, 0.9975750857332447, 0.9975939111608827, 0.9976125907901375, 0.9976311257447729, 0.9976495171399739, 0.9976677660824127, 0.9976858736703106, 0.9977038409935028, 0.9977216691335006, 0.9977393591635534, 0.9977569121487109, 0.9977743291458849, 0.9977916112039098, 0.9978087593636037, 0.9978257746578281, 0.9978426581115474, 0.9978594107418892, 0.9978760335582023, 0.997892527562115, 0.9979088937475938, 0.9979251331010007, 0.9979412466011499, 0.9979572352193652, 0.9979730999195362, 0.9979888416581738, 0.9980044613844665, 0.9980199600403347, 0.9980353385604862, 0.9980505978724697, 0.9980657388967297, 0.998080762546659, 0.9980956697286526, 0.9981104613421601, 0.9981251382797387, 0.9981397014271043, 0.9981541516631836, 0.9981684898601648, 0.9981827168835503, 0.998196833592204, 0.9982108408384048, 0.998224739467894, 0.998238530319926, 0.9982522142273162, 0.998265792016491, 0.9982792645075347, 0.9982926325142392, 0.9983058968441496, 0.998319058298613, 0.9983321176728251, 0.9983450757558759, 0.9983579333307973, 0.9983706911746074, 0.9983833500583577, 0.9983959107471775, 0.998408374000318, 0.9984207405711987, 0.99843301120745, 0.9984451866509576, 0.9984572676379068, 0.9984692548988241, 0.9984811491586221, 0.9984929511366406, 0.9985046615466895, 0.998516281097091, 0.9985278104907205, 0.998539250425049, 0.9985506015921835, 0.9985618646789075, 0.9985730403667223, 0.9985841293318867, 0.9985951322454563, 0.9986060497733246, 0.9986168825762602, 0.9986276313099476, 0.9986382966250248, 0.9986488791671222, 0.9986593795769002, 0.9986697984900882, 0.9986801365375205, 0.9986903943451748, 0.9987005725342086, 0.9987106717209965, 0.9987206925171654, 0.9987306355296329, 0.9987405013606405, 0.9987502906077915, 0.9987600038640845, 0.9987696417179502, 0.9987792047532855, 0.9987886935494876, 0.998798108681489, 0.9988074507197912, 0.9988167202304989, 0.9988259177753532, 0.9988350439117649, 0.9988440991928482, 0.9988530841674523, 0.9988619993801953, 0.9988708453714951, 0.9988796226776031, 0.9988883318306349, 0.998896973358602, 0.9989055477854435, 0.9989140556310573, 0.9989224974113308, 0.998930873638171, 0.9989391848195363, 0.9989474314594652, 0.9989556140581076, 0.9989637331117535, 0.9989717891128628, 0.9989797825500956, 0.9989877139083393, 0.9989955836687396, 0.9990033923087277, 0.9990111403020493, 0.999018828118793, 0.9990264562254172, 0.9990340250847796, 0.9990415351561638, 0.9990489868953062, 0.9990563807544247, 0.9990637171822441, 0.9990709966240232, 0.9990782195215828, 0.99908538631333, 0.9990924974342854, 0.9990995533161093, 0.9991065543871269, 0.9991135010723542, 0.9991203937935235, 0.9991272329691079, 0.9991340190143472, 0.999140752341272, 0.999147433358729, 0.9991540624724046, 0.9991606400848502, 0.999167166595505, 0.9991736424007212, 0.9991800678937864, 0.9991864434649486, 0.9991927695014381, 0.9991990463874916, 0.9992052745043747, 0.9992114542304054, 0.9992175859409759, 0.9992236700085755, 0.9992297068028129, 0.9992356966904383, 0.9992416400353659, 0.9992475371986941, 0.999253388538729, 0.999259194411005, 0.9992649551683059, 0.999270671160687, 0.9992763427354946, 0.9992819702373881, 0.9992875540083602, 0.9992930943877577, 0.9992985917123008, 0.999304046316105, 0.9993094585307, 0.9993148286850494, 0.9993201571055714, 0.9993254441161578, 0.9993306900381931, 0.9993358951905751, 0.9993410598897322, 0.9993461844496442, 0.9993512691818597, 0.9993563143955159, 0.9993613203973564, 0.9993662874917502, 0.9993712159807097, 0.9993761061639089, 0.9993809583387016, 0.9993857728001386, 0.9993905498409864, 0.9993952897517442, 0.9993999928206616, 0.9994046593337557, 0.9994092895748281, 0.9994138838254832, 0.9994184423651439, 0.9994229654710683, 0.9994274534183676, 0.9994319064800213, 0.9994363249268947, 0.9994407090277545, 0.9994450590492857, 0.9994493752561064, 0.9994536579107853, 0.9994579072738564, 0.999462123603835, 0.9994663071572335, 0.9994704581885768, 0.9994745769504169, 0.9994786636933496, 0.9994827186660278, 0.9994867421151781, 0.9994907342856149, 0.9994946954202548, 0.999498625760132, 0.9995025255444128, 0.9995063950104092, 0.9995102343935939, 0.9995140439276147, 0.9995178238443079, 0.9995215743737126, 0.9995252957440849, 0.9995289881819112, 0.999532651911922, 0.9995362871571053, 0.999539894138721, 0.9995434730763126, 0.999547024187722, 0.999550547689102, 0.9995540437949288, 0.999557512718016, 0.9995609546695269, 0.9995643698589873, 0.9995677584942982, 0.9995711207817481, 0.9995744569260261, 0.9995777671302333, 0.9995810515958958, 0.9995843105229772, 0.9995875441098898, 0.9995907525535066, 0.9995939360491741, 0.9995970947907232, 0.9996002289704817, 0.9996033387792852, 0.9996064244064888, 0.9996094860399791, 0.9996125238661848, 0.9996155380700886, 0.9996185288352377, 0.9996214963437557, 0.999624440776353, 0.9996273623123381, 0.9996302611296282, 0.99963313740476, 0.9996359913129005, 0.9996388230278576, 0.9996416327220904, 0.9996444205667198, 0.9996471867315387, 0.999649931385023, 0.9996526546943404, 0.999655356825362, 0.9996580379426713, 0.9996606982095744, 0.9996633377881102}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/softplus_table.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/softplus_table.tb new file mode 100644 index 0000000000..79ab4dbca0 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/softplus_table.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t softplus_table[1024] = {0.0003354063728956624, 0.0003406873550803617, 0.00034605147208270414, 0.0003515000321796832, 0.0003570343642216145, 0.0003626558179556205, 0.00036836576435416487, 0.00037416559594708143, 0.0003800567271613117, 0.00038604059466369, 0.0003921186577109891, 0.0003982923985041191, 0.0004045633225482515, 0.0004109329590188671, 0.0004174028611328379, 0.0004239746065267588, 0.0004306497976387561, 0.00043743006209931624, 0.0004443170531255857, 0.0004513124499229114, 0.0004584179580931756, 0.00046563531004881247, 0.00047296626543294744, 0.00048041261154765414, 0.0004879761637865524, 0.0004956587660768489, 0.0005034622913257134, 0.0005113886418756467, 0.0005194397499659523, 0.0005276175782009742, 0.0005359241200277603, 0.0005443614002182646, 0.0005529314753607456, 0.00056163643435969, 0.0005704783989413742, 0.0005794595241683863, 0.0005885819989631052, 0.0005978480466376919, 0.0006072599254330265, 0.0006168199290670339, 0.0006265303872892831, 0.0006363936664467381, 0.0006464121700566619, 0.0006565883393893283, 0.0006669246540587639, 0.000677423632624396, 0.000688087833200052, 0.000698919854074298, 0.0007099223343392244, 0.000721097954529448, 0.0007324494372708803, 0.0007439795479401416, 0.0007556910953335041, 0.0007675869323469106, 0.0007796699566652858, 0.0007919431114651247, 0.0008044093861248053, 0.0008170718169488274, 0.0008299334879010886, 0.0008429975313509598, 0.0008562671288305988, 0.0008697455118034904, 0.0008834359624455369, 0.0008973418144382418, 0.0009114664537742015, 0.000925813319574447, 0.0009403859049191803, 0.0009551877576918916, 0.0009702224814344057, 0.000985493736218171, 0.0010010052395268958, 0.0010167607671529581, 0.0010327641541097987, 0.001049019295555397, 0.0010655301477316996, 0.0010823007289199843, 0.0010993351204077113, 0.001116637467472828, 0.0011342119803826373, 0.001152062935408532, 0.0011701946758545937, 0.0011886116131049056, 0.0012073182276849145, 0.0012263190703399192, 0.001245618763130227, 0.0012652220005429549, 0.001285133550621346, 0.0013053582561120174, 0.001325901035628126, 0.0013467668848317531, 0.0013679608776354848, 0.00138948816742006, 0.001411353988272717, 0.001433563656243772, 0.0014561225706227344, 0.0014790362152343772, 0.001502310159754285, 0.0015259500610442973, 0.0015499616645093688, 0.0015743508054751494, 0.0015991234105855849, 0.0016242854992240554, 0.0016498431849533595, 0.00167580267698116, 0.0017021702816453115, 0.0017289524039232446, 0.0017561555489635974, 0.001783786323642486, 0.0018118514381423832, 0.0018403577075560003, 0.001869312053513794, 0.001898721505837939, 0.0019285932042193933, 0.0019589343999226604, 0.0019897524575148767, 0.002021054856622275, 0.0020528491937137463, 0.0020851431839098984, 0.002117944662821542, 0.0021512615884148848, 0.0021851020429064782, 0.0022194742346845343, 0.00225438650026252, 0.002289847306260104, 0.0023258652514147, 0.0023624490686246498, 0.0023996076270215313, 0.0024373499340750717, 0.002475685137730357, 0.0025146225285757125, 0.0025541715420447186, 0.0025943417606513874, 0.0026351429162579745, 0.002676584892379097, 0.002718677726518085, 0.0027614316125407776, 0.0028048569030831336, 0.0028489641119965273, 0.0028937639168299767, 0.002939267161347188, 0.0029854848580845154, 0.003032428190946177, 0.0030801085178368253, 0.0031285373733357844, 0.003177726471409947, 0.0032276877081680796, 0.0032784331646550737, 0.0033299751096895515, 0.00338232600274224, 0.0034354984968580755, 0.0034895054416212157, 0.0035443598861650237, 0.003600075082226414, 0.003656664487244397, 0.00371414176750642, 0.0037725208013405435, 0.003831815682353728, 0.0038920407227198005, 0.003953210456515133, 0.004015339643103613, 0.004078443270570686, 0.0041425365592096, 0.004207634965057619, 0.004273754183484201, 0.00434091015283269, 0.004409119058115038, 0.004478397334758896, 0.00454876167241298, 0.004620229018803844, 0.004692816583653071, 0.004766541842647062, 0.004841422541469125, 0.0049174766998860125, 0.00499472261589659, 0.005073178869938986, 0.005152864329159891, 0.005233798151743009, 0.005315999791303611, 0.005399489001343105, 0.005484285839769194, 0.005570410673479947, 0.005657884183015798, 0.005746727367274214, 0.005836961548296114, 0.005928608376116558, 0.0060216898336872, 0.006116228241867638, 0.006212246264487619, 0.006309766913480709, 0.0064088135540907335, 0.006509409910152201, 0.006611580069443769, 0.006715348489117967, 0.006820740001207251, 0.006927779818205848, 0.007036493538730301, 0.007146907153260105, 0.00725904704995559, 0.0073729400205585814, 0.007488613266373843, 0.007606094404333926, 0.007725411473146704, 0.007846592939527523, 0.007969667704517625, 0.00809466510988807, 0.008221614944632333, 0.00835054745154541, 0.008481493333893905, 0.008614483762175515, 0.008749550380970798, 0.008886725315885627, 0.009026041180588841, 0.009167531083942195, 0.009311228637226806, 0.009457167961465755, 0.009605383694842864, 0.00975591100022136, 0.009908785572759312, 0.010064043647627727, 0.010221722007827632, 0.010381857992110645, 0.010544489503002403, 0.010709655014929037, 0.010877393582449336, 0.011047744848593777, 0.011220749053308582, 0.011396447042009006, 0.01157488027424076, 0.011756090832452602, 0.011940121430876954, 0.01212701542452431, 0.012316816818289951, 0.012509570276173372, 0.012705321130611907, 0.012904115391931047, 0.01310599975790885, 0.013311021623458399, 0.013519229090428419, 0.01373067097752129, 0.013945396830331734, 0.014163456931505055, 0.014384902311015907, 0.014609784756570059, 0.014838156824127785, 0.015070071848550779, 0.015305583954372405, 0.015544748066693015, 0.015787619922200864, 0.01603425608031871, 0.01628471393447507, 0.016539051723505158, 0.016797328543178114, 0.01705960435785183, 0.017325940012255757, 0.017596397243405684, 0.017871038692643677, 0.018149927917809776, 0.018433129405544584, 0.018720708583719547, 0.01901273183400041, 0.019309266504538583, 0.019610380922795604, 0.01991614440849685, 0.020226627286717296, 0.020541900901097004, 0.0208620376271882, 0.0211871108859329, 0.021517195157270103, 0.021852365993874717, 0.022192700035024944, 0.02253827502059903, 0.02288916980520317, 0.0232454643724251, 0.02360723984921647, 0.023974578520403956, 0.02434756384332458, 0.024726280462587746, 0.025110814224961593, 0.025501252194382894, 0.0258976826670897, 0.026300195186875368, 0.026708880560461664, 0.027123830872991465, 0.027545139503638134, 0.0279729011413303, 0.02840721180058971, 0.02884816883748234, 0.0292958709656785, 0.02975041827262065, 0.030211912235796546, 0.03068045573911709, 0.031156153089394074, 0.03163911003291504, 0.03212943377211589, 0.03262723298234382, 0.033132617828712116, 0.03364569998303876, 0.03416659264087037, 0.0346954105385841, 0.0352322699705664, 0.035777288806463185, 0.03633058650849828, 0.0368922841488559, 0.037462504427122525, 0.038041371687783036, 0.03862901193776906, 0.03922555286404995, 0.03983112385126604, 0.04044585599939578, 0.04106988214145289, 0.04170333686120597, 0.04234635651091622, 0.04299907922908784, 0.043661644958220974, 0.04433419546256512, 0.04501687434586289, 0.0457098270690787, 0.04641320096810361, 0.04712714527143037, 0.04785181111779034, 0.04858735157374197, 0.04933392165120608, 0.050091678324936065, 0.050860780549916995, 0.051641389278681546, 0.05243366747853465, 0.053237780148677964, 0.05405389433722194, 0.05488217915807824, 0.05572280580771631, 0.05657594758178161, 0.05744177989155504, 0.058320480280248894, 0.05921222843912356, 0.0601172062234148, 0.06103559766805858, 0.061967589003198625, 0.06291336866946722, 0.0638731273330229, 0.0648470579003314, 0.06583535553267797, 0.0668382176603926, 0.06785584399677821, 0.06888843655172094, 0.06993619964497334, 0.07099933991908858, 0.07207806635199317, 0.07317259026918149, 0.07428312535551256, 0.07540988766659504, 0.07655309563974087, 0.07771297010447017, 0.07888973429254957, 0.0800836138475443, 0.08129483683386562, 0.08252363374529631, 0.08377023751297005, 0.08503488351278922, 0.08631780957225904, 0.0876192559767173, 0.08893946547493871, 0.09027868328409475, 0.09163715709404396, 0.09301513707093458, 0.09441287586009445, 0.09583062858818926, 0.09726865286462316, 0.09872720878216075, 0.10020655891674712, 0.10170696832650451, 0.10322870454987645, 0.10477203760290424, 0.1063372399756036, 0.10792458662742362, 0.1095343549817592, 0.11116682491949764, 0.11282227877156932, 0.11450100131048288, 0.11620327974081733, 0.11792940368864717, 0.11967966518987712, 0.12145435867745892, 0.12325378096746863, 0.1250782312440172, 0.1269280110429724, 0.1288034242344639, 0.13070477700415062, 0.13263237783322407, 0.13458653747712504, 0.13656756894294728, 0.138575787465507, 0.1406115104820542, 0.1426750576056016, 0.14476675059684882, 0.14688691333468207, 0.14903587178522146, 0.1512139539694014, 0.1534214899290569, 0.1556588116914998, 0.15792625323256193, 0.16022415043808716, 0.1625528410638539, 0.16491266469390814, 0.167303962697292, 0.16972707818315066, 0.1721823559542, 0.17467014245854184, 0.177190785739814, 0.17974463538565896, 0.1823320424745012, 0.18495335952062164, 0.18760894041752013, 0.1902991403795559, 0.19302431588185892, 0.1957848245985066, 0.1985810253389593, 0.20141327798275246, 0.20428194341244124, 0.20718738344479998, 0.21012996076027193, 0.2131100388306759, 0.21612798184517223, 0.21918415463449092, 0.2222789225934327, 0.22541265160164792, 0.2285857079427062, 0.23179845822146725, 0.23505126927976808, 0.23834450811044117, 0.24167854176968054, 0.24505373728777813, 0.2484704615782499, 0.2519290813453729, 0.25542996299016474, 0.258973472514827, 0.26255997542568466, 0.26618983663465434, 0.2698634203592726, 0.2735810900213219, 0.27734320814409186, 0.28115013624831475, 0.2850022347468196, 0.2888998628379482, 0.2928433783977785, 0.2968331378712067, 0.3008694961619363, 0.3049528065214295, 0.30908342043687387, 0.3132616875182228, 0.31748795538436975, 0.32176256954851534, 0.32608587330279354, 0.33045820760222067, 0.33487991094803354, 0.3393513192704904, 0.34387276581119985, 0.3484445810050557, 0.35306709236185063, 0.3577406243476442, 0.3624654982659652, 0.3672420321389269, 0.37207054058833766, 0.3769513347168864, 0.38188472198949264, 0.3868710061149, 0.39191048692760483, 0.3970034602702044, 0.4021502178762545, 0.40735104725372645, 0.41260623156915227, 0.41791604953255035, 0.4232807752832234, 0.42870067827651864, 0.43417602317164605, 0.43970706972064355, 0.4452940726585858, 0.45093728159512503, 0.45663694090746143, 0.4623932896348319, 0.46820656137460925, 0.4740769841801067, 0.4800047804601745, 0.4859901668806822, 0.4920333542679718, 0.49813454751437525, 0.5042939454858787, 0.5105117409320202, 0.5167881203981086, 0.52312326413984, 0.5295173460403996, 0.5359705335301227, 0.5424829875087954, 0.5490548622706684, 0.5556863054322577, 0.5623774578630043, 0.5691284536188559, 0.5759394198788436, 0.5828104768847107, 0.5897417378836574, 0.596733309074256, 0.6037852895555947, 0.6108977712796968, 0.618070839007268, 0.6253045702668136, 0.632599035317169, 0.6399542971134814, 0.647370411276677, 0.6548474260664466, 0.6623853823577756, 0.6699843136210436, 0.6776442459057149, 0.6853651978276345, 0.6931471805599453, 0.7009901978276344, 0.7088942459057148, 0.7168593136210436, 0.7248853823577753, 0.7329724260664465, 0.741120411276677, 0.7493292971134814, 0.757599035317169, 0.7659295702668136, 0.7743208390072679, 0.782772771279697, 0.7912852895555946, 0.799858309074256, 0.8084917378836574, 0.8171854768847107, 0.8259394198788435, 0.8347534536188559, 0.8436274578630043, 0.8525613054322577, 0.861554862270668, 0.8706079875087953, 0.8797205335301228, 0.8888923460403997, 0.8981232641398401, 0.9074131203981086, 0.9167617409320203, 0.9261689454858786, 0.9356345475143754, 0.9451583542679718, 0.9547401668806821, 0.9643797804601746, 0.9740769841801067, 0.9838315613746091, 0.9936432896348317, 1.0035119409074615, 1.013437281595125, 1.0234190726585857, 1.0334570697206433, 1.043551023171646, 1.0537006782765186, 1.0639057752832233, 1.0741660495325505, 1.0844812315691523, 1.0948510472537265, 1.1052752178762544, 1.1157534602702044, 1.1262854869276047, 1.1368710061148999, 1.1475097219894925, 1.1582013347168862, 1.1689455405883376, 1.179742032138927, 1.190590498265965, 1.2014906243476442, 1.2124420923618506, 1.2234445810050556, 1.2344977658111997, 1.2456013192704902, 1.2567549109480336, 1.2679582076022207, 1.2792108733027936, 1.2905125695485151, 1.3018629553843697, 1.3132616875182228, 1.3247084204368738, 1.3362028065214295, 1.3477444961619365, 1.3593331378712066, 1.3709683783977786, 1.3826498628379482, 1.3943772347468197, 1.4061501362483146, 1.4179682081440919, 1.4298310900213218, 1.4417384203592727, 1.4536898366346542, 1.4656849754256847, 1.477723472514827, 1.4898049629901648, 1.501929081345373, 1.5140954615782498, 1.5263037372877781, 1.5385535417696805, 1.5508445081104412, 1.5631762692797682, 1.5755484582214672, 1.587960707942706, 1.6004126516016477, 1.6129039225934327, 1.625434154634491, 1.638002981845172, 1.650610038830676, 1.6632549607602718, 1.6759373834448001, 1.6886569434124414, 1.7014132779827524, 1.7142060253389595, 1.7270348245985065, 1.7398993158818588, 1.7527991403795558, 1.76573394041752, 1.7787033595206214, 1.791707042474501, 1.804744635385659, 1.817815785739814, 1.8309201424585417, 1.8440573559542, 1.8572270781831508, 1.870428962697292, 1.8836626646939079, 1.896927841063854, 1.9102241504380872, 1.9235512532325618, 1.9369088116914996, 1.950296489929057, 1.9637139539694013, 1.9771608717852214, 1.990636913334682, 2.004141750596849, 2.0176750576056013, 2.0312365104820542, 2.044825787465507, 2.0584425689429473, 2.072086537477125, 2.085757377833224, 2.0994547770041505, 2.113178424234464, 2.1269280110429722, 2.1407032312440175, 2.1545037809674685, 2.1683293586774592, 2.182179665189877, 2.196054403688647, 2.2099532797408172, 2.2238760013104826, 2.2378222787715694, 2.2517918249194975, 2.265784354981759, 2.2797995866274237, 2.2938372399756037, 2.3078970376029044, 2.3219787045498763, 2.3360819683265044, 2.350206558916747, 2.3643522087821607, 2.378518652864623, 2.3927056285881894, 2.4069128758600944, 2.4211401370709345, 2.435387157094044, 2.4496536832840947, 2.4639394654749385, 2.478244255976717, 2.4925678095722588, 2.506909883512789, 2.52127023751297, 2.5356486337452964, 2.5500448368338655, 2.564458613847544, 2.5788897342925496, 2.59333797010447, 2.6078030956397407, 2.622284887666595, 2.6367831253555125, 2.6512975902691815, 2.665828066351993, 2.6803743399190885, 2.694936199644973, 2.709513436551721, 2.724105843996778, 2.7387132176603926, 2.753335355532678, 2.7679720579003315, 2.782623127333023, 2.797288368669467, 2.8119675890031988, 2.8266605976680585, 2.8413672062234148, 2.856087228439123, 2.8708204802802486, 2.8855667798915547, 2.9003259475817815, 2.9150978058077164, 2.9298821791580782, 2.944678894337222, 2.959487780148678, 2.9743086674785344, 2.9891413892786813, 3.003985780549917, 3.018841678324936, 3.033708921651206, 3.048587351573742, 3.0634768111177904, 3.07837714527143, 3.0932882009681033, 3.1082098270690786, 3.1231418743458628, 3.138084195462565, 3.153036644958221, 3.1679990792290877, 3.182971356510916, 3.197953336861206, 3.212944882141453, 3.227945855999396, 3.242956123851266, 3.2579755528640497, 3.273004011937769, 3.288041371687783, 3.3030875044271224, 3.318142284148856, 3.333205586508498, 3.348277288806463, 3.363357269970566, 3.3784454105385837, 3.39354159264087, 3.4086456999830386, 3.423757617828712, 3.438877232982344, 3.454004433772116, 3.469139110032915, 3.484281153089394, 3.499430455739117, 3.5145869122357962, 3.5297504182726205, 3.5449208709656785, 3.560098168837482, 3.5752822118005896, 3.59047290114133, 3.605670139503638, 3.6208738308729913, 3.6360838805604616, 3.651300195186875, 3.6665226826670896, 3.681751252194383, 3.6969858142249614, 3.7122262804625876, 3.7274725638433246, 3.742724578520404, 3.757982239849216, 3.773245464372425, 3.788514169805203, 3.8037882750205987, 3.8190677000350246, 3.8343523659938747, 3.84964219515727, 3.8649371108859327, 3.880237037627188, 3.8955419009010965, 3.9108516272867173, 3.9261661444084965, 3.941485380922795, 3.9568092665045387, 3.9721377318340005, 3.9874707085837193, 4.002808129405544, 4.0181499279178094, 4.033496038692643, 4.0488463972434054, 4.064200940012256, 4.0795596043578515, 4.094922328543178, 4.110289051723505, 4.125659713934475, 4.141034256080319, 4.156412619922201, 4.171794748066693, 4.1871805839543725, 4.202570071848551, 4.217963156824128, 4.23335978475657, 4.248759902311016, 4.264163456931505, 4.279570396830332, 4.294980670977521, 4.310394229090428, 4.325811021623458, 4.341230999757909, 4.356654115391931, 4.372080321130612, 4.3875095702761735, 4.40294181681829, 4.418377015424524, 4.433815121430877, 4.449256090832453, 4.464699880274241, 4.480146447042008, 4.495595749053308, 4.511047744848594, 4.526502393582449, 4.541959655014929, 4.557419489503002, 4.572881857992111, 4.588346722007827, 4.603814043647628, 4.619283785572759, 4.634755911000221, 4.650230383694843, 4.6657071679614655, 4.681186228637227, 4.696667531083942, 4.712151041180589, 4.727636725315886, 4.743124550380971, 4.758614483762176, 4.7741064933338935, 4.7896005474515455, 4.805096614944632, 4.820594665109888, 4.836094667704518, 4.8515965929395275, 4.867100411473147, 4.882606094404334, 4.898113613266374, 4.913622940020558, 4.929134047049955, 4.94464690715326, 4.96016149353873, 4.975677779818206, 4.991195740001207, 5.006715348489118, 5.0222365800694435, 5.037759409910152, 5.05328381355409, 5.06880976691348, 5.084337246264488, 5.099866228241868, 5.115396689833687, 5.1309286083761165, 5.146461961548296, 5.161996727367274, 5.177532884183016, 5.19307041067348, 5.208609285839769, 5.224149489001343, 5.2396909997913035, 5.2552337981517425, 5.27077786432916, 5.286323178869939, 5.301869722615896, 5.317417476699886, 5.332966422541469, 5.348516541842647, 5.364067816583653, 5.379620229018804, 5.395173761672413, 5.410728397334759, 5.426284119058114, 5.441840910152832, 5.457398754183484, 5.4729576349650575, 5.488517536559209, 5.50407844327057, 5.519640339643104, 5.535203210456515, 5.55076704072272, 5.566331815682354, 5.58189752080134, 5.597464141767507, 5.613031664487244, 5.628600075082226, 5.644169359886165, 5.659739505441621, 5.675310498496858, 5.690882326002742, 5.70645497510969, 5.722028433164655, 5.737602687708168, 5.75317772647141, 5.768753537373335, 5.784330108517836, 5.799907428190946, 5.815485484858084, 5.831064267161347, 5.84664376391683, 5.862223964111997, 5.877804856903083, 5.893386431612541, 5.908968677726518, 5.924551584892379, 5.940135142916258, 5.955719341760651, 5.9713041715420445, 5.986889622528576, 6.00247568513773, 6.018062349934075, 6.033649607627021, 6.049237449068625, 6.064825865251414, 6.08041484730626, 6.096004386500263, 6.111594474234685, 6.1271851020429065, 6.1427762615884145, 6.158367944662821, 6.173960143183909, 6.1895528491937135, 6.205146054856622, 6.220739752457515, 6.236333934399923, 6.2519285932042195, 6.267523721505838, 6.283119312053514, 6.298715357707556, 6.314311851438142, 6.329908786323642, 6.345506155548963, 6.361103952403923, 6.3767021702816455, 6.3923008026769805, 6.407899843184953, 6.423499285499224, 6.439099123410585, 6.454699350805475, 6.470299961664509, 6.485900950061044, 6.501502310159754, 6.517104036215234, 6.532706122570622, 6.548308563656243, 6.5639113539882725, 6.57951448816742, 6.595117960877635, 6.610721766884832, 6.626325901035628, 6.641930358256112, 6.657535133550621, 6.6731402220005425, 6.68874561876313, 6.70435131907034, 6.719957318227685, 6.735563611613105, 6.751170194675854, 6.766777062935408, 6.782384211980382, 6.797991637467472, 6.813599335120408, 6.829207300728919, 6.844815530147732, 6.8604240192955555, 6.876032764154109, 6.891641760767153, 6.907251005239527, 6.922860493736218, 6.938470222481434, 6.954080187757691, 6.969690385904919, 6.985300813319574, 7.000911466453774, 7.016522341814438, 7.032133435962446, 7.047744745511803, 7.06335626712883, 7.07896799753135, 7.094579933487901, 7.1101920718169485, 7.125804409386125, 7.141416943111465, 7.157029669956665, 7.172642586932347, 7.188255691095334, 7.20386897954794, 7.219482449437271, 7.235096097954529, 7.250709922334339, 7.266323919854074, 7.2819380878332, 7.297552423632625, 7.313166924654059, 7.328781588339389, 7.344396412170056, 7.360011393666446, 7.375626530387289, 7.391241819929067, 7.406857259925433, 7.422472848046637, 7.438088581998962, 7.453704459524168, 7.469320478398941, 7.48493663643436, 7.500552931475361, 7.516169361400218, 7.531785924120028, 7.547402617578201, 7.563019439749966, 7.5786363886418755, 7.594253462291325, 7.609870658766077, 7.625487976163787, 7.641105412611547, 7.656722966265432, 7.672340635310048, 7.687958417958093, 7.703576312449923, 7.719194317053125, 7.7348124300620995, 7.7504306497976385, 7.766048974606527, 7.781667402861133, 7.797285932959019, 7.812904563322548, 7.828523292398504, 7.844142118657711, 7.859761040594663, 7.875380056727161, 7.890999165595947, 7.906618365764354, 7.9222376558179555, 7.937857034364221, 7.953476500032179, 7.969096051472082, 7.98471568735508}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/softsign_table.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/softsign_table.tb new file mode 100644 index 0000000000..cd3093024d --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/softsign_table.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t softsign_table[1024] = {0.0038910505836575876, 0.011583011583011582, 0.019157088122605363, 0.026615969581749048, 0.033962264150943396, 0.04119850187265917, 0.048327137546468404, 0.055350553505535055, 0.06227106227106227, 0.06909090909090909, 0.07581227436823104, 0.08243727598566308, 0.08896797153024912, 0.09540636042402827, 0.10175438596491228, 0.10801393728222997, 0.11418685121107267, 0.12027491408934708, 0.12627986348122866, 0.13220338983050847, 0.13804713804713806, 0.14381270903010032, 0.14950166112956811, 0.1551155115511551, 0.16065573770491803, 0.16612377850162866, 0.1715210355987055, 0.17684887459807075, 0.18210862619808307, 0.1873015873015873, 0.19242902208201892, 0.1974921630094044, 0.20249221183800623, 0.20743034055727555, 0.2123076923076923, 0.21712538226299694, 0.22188449848024316, 0.22658610271903323, 0.23123123123123124, 0.23582089552238805, 0.2403560830860534, 0.2448377581120944, 0.24926686217008798, 0.2536443148688047, 0.2579710144927536, 0.2622478386167147, 0.2664756446991404, 0.2706552706552707, 0.2747875354107649, 0.27887323943661974, 0.28291316526610644, 0.28690807799442897, 0.29085872576177285, 0.29476584022038566, 0.29863013698630136, 0.3024523160762943, 0.3062330623306233, 0.30997304582210244, 0.3136729222520107, 0.31733333333333336, 0.3209549071618037, 0.3245382585751979, 0.32808398950131235, 0.33159268929503916, 0.33506493506493507, 0.3385012919896641, 0.34190231362467866, 0.3452685421994885, 0.3486005089058524, 0.3518987341772152, 0.35516372795969775, 0.3583959899749373, 0.36159600997506236, 0.36476426799007444, 0.36790123456790125, 0.371007371007371, 0.3740831295843521, 0.3771289537712895, 0.3801452784503632, 0.38313253012048193, 0.38609112709832133, 0.38902147971360385, 0.3919239904988123, 0.3947990543735225, 0.3976470588235294, 0.40046838407494145, 0.40326340326340326, 0.4060324825986079, 0.40877598152424943, 0.4114942528735632, 0.41418764302059496, 0.4168564920273349, 0.41950113378684806, 0.4221218961625282, 0.4247191011235955, 0.4272930648769575, 0.4298440979955457, 0.43237250554323725, 0.434878587196468, 0.43736263736263736, 0.43982494529540483, 0.4422657952069717, 0.44468546637744033, 0.4470842332613391, 0.44946236559139785, 0.4518201284796574, 0.4541577825159915, 0.4564755838641189, 0.4587737843551797, 0.4610526315789474, 0.46331236897274636, 0.46555323590814196, 0.4677754677754678, 0.4699792960662526, 0.47216494845360824, 0.47433264887063653, 0.47648261758691207, 0.4786150712830957, 0.48073022312373226, 0.48282828282828283, 0.48490945674044267, 0.48697394789579157, 0.48902195608782434, 0.49105367793240556, 0.49306930693069306, 0.49506903353057197, 0.49705304518664045, 0.49902152641878667, 0.5009746588693957, 0.5029126213592233, 0.504835589941973, 0.5067437379576107, 0.508637236084453, 0.5105162523900574, 0.5123809523809524, 0.5142314990512334, 0.5160680529300568, 0.5178907721280602, 0.5196998123827392, 0.5214953271028038, 0.5232774674115456, 0.5250463821892394, 0.5268022181146026, 0.5285451197053407, 0.5302752293577981, 0.5319926873857403, 0.5336976320582878, 0.5353901996370236, 0.5370705244122965, 0.5387387387387388, 0.5403949730700179, 0.5420393559928444, 0.5436720142602496, 0.5452930728241563, 0.5469026548672566, 0.5485008818342152, 0.5500878734622144, 0.5516637478108581, 0.5532286212914486, 0.5547826086956522, 0.5563258232235702, 0.5578583765112263, 0.5593803786574871, 0.5608919382504288, 0.5623931623931624, 0.5638841567291312, 0.565365025466893, 0.5668358714043993, 0.5682967959527825, 0.5697478991596638, 0.5711892797319933, 0.5726210350584308, 0.5740432612312812, 0.5754560530679934, 0.5768595041322314, 0.5782537067545305, 0.5796387520525451, 0.5810147299509002, 0.5823817292006526, 0.583739837398374, 0.5850891410048622, 0.5864297253634895, 0.5877616747181964, 0.5890850722311396, 0.5904, 0.5917065390749602, 0.5930047694753577, 0.5942947702060222, 0.5955766192733017, 0.5968503937007874, 0.598116169544741, 0.5993740219092332, 0.6006240249609984, 0.6018662519440124, 0.6031007751937985, 0.6043276661514683, 0.6055469953775039, 0.6067588325652842, 0.6079632465543645, 0.6091603053435114, 0.6103500761035008, 0.6115326251896813, 0.6127080181543116, 0.6138763197586727, 0.6150375939849624, 0.616191904047976, 0.617339312406577, 0.6184798807749627, 0.6196136701337296, 0.6207407407407407, 0.621861152141802, 0.6229749631811488, 0.6240822320117474, 0.6251830161054173, 0.6262773722627737, 0.6273653566229985, 0.6284470246734397, 0.6295224312590448, 0.6305916305916306, 0.6316546762589929, 0.6327116212338594, 0.6337625178826896, 0.6348074179743224, 0.635846372688478, 0.6368794326241135, 0.637906647807638, 0.6389280677009873, 0.639943741209564, 0.6409537166900421, 0.641958041958042, 0.6429567642956764, 0.6439499304589708, 0.6449375866851595, 0.6459197786998617, 0.646896551724138, 0.6478679504814305, 0.6488340192043895, 0.6497948016415869, 0.6507503410641201, 0.6517006802721088, 0.6526458616010855, 0.6535859269282814, 0.6545209176788124, 0.6554508748317631, 0.6563758389261745, 0.6572958500669344, 0.6582109479305741, 0.6591211717709721, 0.6600265604249668, 0.6609271523178808, 0.6618229854689565, 0.6627140974967062, 0.6636005256241787, 0.6644823066841415, 0.665359477124183, 0.666232073011734, 0.6671001300390117, 0.6679636835278858, 0.6688227684346701, 0.6696774193548387, 0.6705276705276705, 0.6713735558408216, 0.6722151088348272, 0.6730523627075351, 0.6738853503184713, 0.6747141041931385, 0.6755386565272496, 0.6763590391908976, 0.6771752837326608, 0.6779874213836478, 0.6787954830614805, 0.6795994993742178, 0.6803995006242197, 0.6811955168119551, 0.6819875776397516, 0.6827757125154895, 0.6835599505562423, 0.6843403205918619, 0.6851168511685117, 0.6858895705521473, 0.6866585067319462, 0.6874236874236874, 0.6881851400730816, 0.6889428918590522, 0.6896969696969697, 0.6904474002418379, 0.6911942098914354, 0.6919374247894103, 0.6926770708283313, 0.6934131736526946, 0.6941457586618877, 0.6948748510131109, 0.6956004756242569, 0.6963226571767497, 0.6970414201183432, 0.6977567886658795, 0.6984687868080094, 0.699177438307873, 0.6998827667057445, 0.7005847953216374, 0.7012835472578763, 0.7019790454016298, 0.70267131242741, 0.7033603707995365, 0.7040462427745665, 0.7047289504036909, 0.7054085155350979, 0.7060849598163031, 0.7067583046964491, 0.7074285714285714, 0.7080957810718358, 0.7087599544937428, 0.7094211123723042, 0.710079275198188, 0.7107344632768362, 0.7113866967305524, 0.7120359955005624, 0.712682379349046, 0.7133258678611423, 0.7139664804469273, 0.7146042363433668, 0.7152391546162402, 0.7158712541620422, 0.716500553709856, 0.7171270718232045, 0.7177508269018743, 0.7183718371837183, 0.7189901207464325, 0.71960569550931, 0.7202185792349727, 0.7208287895310797, 0.721436343852013, 0.7220412595005429, 0.7226435536294691, 0.7232432432432433, 0.7238403451995685, 0.7244348762109796, 0.7250268528464017, 0.7256162915326902, 0.7262032085561497, 0.7267876200640342, 0.7273695420660277, 0.7279489904357067, 0.7285259809119831, 0.7291005291005291, 0.7296726504751848, 0.7302423603793466, 0.7308096740273397, 0.7313746065057712, 0.7319371727748691, 0.7324973876698014, 0.7330552659019812, 0.7336108220603538, 0.7341640706126688, 0.7347150259067358, 0.7352637021716649, 0.7358101135190919, 0.7363542739443872, 0.736896197327852, 0.7374358974358974, 0.7379733879222109, 0.7385086823289071, 0.7390417940876657, 0.7395727365208545, 0.7401015228426396, 0.7406281661600811, 0.7411526794742164, 0.7416750756811302, 0.7421953675730111, 0.742713567839196, 0.7432296890672017, 0.7437437437437437, 0.7442557442557443, 0.744765702891326, 0.745273631840796, 0.7457795431976166, 0.7462834489593657, 0.7467853610286844, 0.7472852912142152, 0.7477832512315271, 0.7482792527040315, 0.7487733071638861, 0.7492654260528894, 0.7497556207233627, 0.7502439024390244, 0.750730282375852, 0.7512147716229349, 0.7516973811833172, 0.7521781219748306, 0.7526570048309179, 0.7531340405014465, 0.753609239653513, 0.7540826128722382, 0.7545541706615532, 0.755023923444976, 0.7554918815663801, 0.7559580552907531, 0.7564224548049476, 0.7568850902184235, 0.7573459715639811, 0.7578051087984863, 0.7582625118035883, 0.7587181903864278, 0.7591721542803387, 0.7596244131455399, 0.7600749765698219, 0.7605238540692236, 0.7609710550887021, 0.7614165890027959, 0.761860465116279, 0.7623026926648097, 0.76274328081557, 0.7631822386679001, 0.7636195752539243, 0.7640552995391705, 0.764489420423183, 0.7649219467401286, 0.7653528872593951, 0.7657822506861848, 0.7662100456621005, 0.7666362807657247, 0.7670609645131938, 0.7674841053587648, 0.7679057116953762, 0.7683257918552037, 0.7687443541102078, 0.7691614066726781, 0.7695769576957696, 0.7699910152740341, 0.7704035874439462, 0.7708146821844225, 0.7712243074173369, 0.7716324710080286, 0.7720391807658059, 0.7724444444444445, 0.7728482697426797, 0.7732506643046945, 0.7736516357206012, 0.7740511915269197, 0.7744493392070485, 0.7748460861917327, 0.7752414398595259, 0.775635407537248, 0.7760279965004374, 0.7764192139737991, 0.7768090671316478, 0.7771975630983464, 0.7775847089487402, 0.7779705117085863, 0.7783549783549784, 0.7787381158167676, 0.7791199309749784, 0.7795004306632214, 0.7798796216680998, 0.7802575107296137, 0.7806341045415596, 0.7810094097519247, 0.7813834329632793, 0.7817561807331628, 0.7821276595744681, 0.7824978759558199, 0.7828668363019508, 0.7832345469940728, 0.7836010143702451, 0.7839662447257384, 0.7843302443133952, 0.7846930193439865, 0.7850545759865659, 0.7854149203688181, 0.7857740585774059, 0.7861319966583125, 0.786488740617181, 0.7868442964196503, 0.7871986699916874, 0.787551867219917, 0.7879038939519469, 0.7882547559966915, 0.7886044591246903, 0.7889530090684254, 0.7893004115226337, 0.7896466721446179, 0.7899917965545529, 0.7903357903357904, 0.7906786590351594, 0.7910204081632654, 0.7913610431947841, 0.7917005695687551, 0.7920389926888708, 0.7923763179237632, 0.7927125506072874, 0.7930476960388035, 0.7933817594834544, 0.7937147461724415, 0.7940466613032985, 0.7943775100401607, 0.7947072975140337, 0.7950360288230585, 0.7953637090327738, 0.7956903431763767, 0.7960159362549801, 0.7963404932378679, 0.7966640190627482, 0.7969865186360032, 0.7973079968329374, 0.7976284584980237, 0.797947908445146, 0.7982663514578409, 0.7985837922895358, 0.7989002356637863, 0.7992156862745098, 0.7995301487862176, 0.7998436278342455, 0.8001561280249805, 0.8004676539360873, 0.8007782101167316, 0.8010878010878011, 0.8013964313421257, 0.801704105344694, 0.8020108275328693, 0.8023166023166023, 0.8026214340786431, 0.8029253271747498, 0.8032282859338971, 0.8035303146584805, 0.803831417624521, 0.8041315990818668, 0.8044308632543926, 0.8047292143401983, 0.805026656511805, 0.8053231939163498, 0.8056188306757783, 0.8059135708870356, 0.8062074186222559, 0.8065003779289494, 0.8067924528301886, 0.8070836473247928, 0.8073739653875094, 0.8076634109691961, 0.8079519879969993, 0.8082397003745319, 0.8085265519820494, 0.8088125466766244, 0.8090976882923192, 0.8093819806403574, 0.8096654275092937, 0.8099480326651819, 0.810229799851742, 0.8105107327905255, 0.8107908351810791, 0.811070110701107, 0.8113485630066323, 0.811626195732156, 0.8119030124908155, 0.8121790168745414, 0.8124542124542125, 0.8127286027798098, 0.8130021913805697, 0.8132749817651349, 0.8135469774217043, 0.8138181818181818, 0.8140885984023238, 0.8143582306018854, 0.8146270818247646, 0.8148951554591468, 0.8151624548736462, 0.8154289834174477, 0.8156947444204463, 0.8159597411933861, 0.8162239770279971, 0.8164874551971326, 0.8167501789549033, 0.8170121515368121, 0.8172733761598858, 0.8175338560228083, 0.8177935943060498, 0.8180525941719972, 0.8183108587650816, 0.8185683912119065, 0.8188251946213729, 0.8190812720848056, 0.8193366266760762, 0.8195912614517266, 0.8198451794510908, 0.8200983836964161, 0.8203508771929825, 0.8206026629292221, 0.820853743876837, 0.8211041229909154, 0.8213538032100488, 0.821602787456446, 0.8218510786360473, 0.8220986796386379, 0.8223455933379598, 0.8225918225918226, 0.8228373702422145, 0.8230822391154112, 0.8233264320220842, 0.8235699517574087, 0.82381280110117, 0.8240549828178694, 0.824296499656829, 0.8245373543522961, 0.8247775496235455, 0.8250170881749829, 0.8252559726962457, 0.825494205862304, 0.8257317903335603, 0.8259687287559483, 0.8262050237610319, 0.8264406779661017, 0.8266756939742722, 0.8269100743745774, 0.8271438217420661, 0.8273769386378962, 0.8276094276094276, 0.827841291190316, 0.8280725319006045, 0.8283031522468142, 0.8285331547220361, 0.82876254180602, 0.8289913159652639, 0.829219479653102, 0.8294470353097935, 0.8296739853626082, 0.8299003322259136, 0.8301260783012607, 0.8303512259774686, 0.8305757776307081, 0.8307997356245869, 0.831023102310231, 0.8312458800263678, 0.8314680710994075, 0.831689677843524, 0.8319107025607354, 0.8321311475409836, 0.8323510150622135, 0.8325703073904512, 0.8327890267798824, 0.8330071754729289, 0.8332247557003257, 0.8334417696811971, 0.8336582196231319, 0.8338741077222582, 0.8340894361633182, 0.8343042071197411, 0.8345184227537169, 0.8347320852162685, 0.8349451966473244, 0.8351577591757888, 0.8353697749196142, 0.8355812459858702, 0.8357921744708147, 0.8360025624599615, 0.836212412028151, 0.8364217252396167, 0.8366305041480536, 0.8368387507966858, 0.8370464672183323, 0.8372536554354736, 0.8374603174603175, 0.8376664552948636, 0.8378720709309689, 0.8380771663504112, 0.8382817435249527, 0.8384858044164037, 0.8386893509766856, 0.8388923851478918, 0.8390949088623507, 0.8392969240426867, 0.8394984326018808, 0.8396994364433312, 0.8398999374609131, 0.8400999375390381, 0.8402994385527136, 0.8404984423676013, 0.8406969508400747, 0.8408949658172779, 0.8410924891371818, 0.8412895226286423, 0.8414860681114551, 0.8416821273964131, 0.8418777022853613, 0.8420727945712523, 0.8422674060382008, 0.8424615384615385, 0.8426551936078672, 0.8428483732351135, 0.8430410790925812, 0.8432333129210043, 0.8434250764525993, 0.8436163714111179, 0.8438071995118975, 0.8439975624619135, 0.8441874619598296, 0.8443768996960487, 0.8445658773527626, 0.8447543966040024, 0.8449424591156874, 0.8451300665456746, 0.8453172205438066, 0.8455039227519614, 0.8456901748040988, 0.8458759783263095, 0.8460613349368611, 0.8462462462462462, 0.8464307138572286, 0.8466147393648892, 0.8467983243566727, 0.8469814704124328, 0.8471641791044776, 0.8473464519976148, 0.847528290649196, 0.8477096966091612, 0.8478906714200832, 0.8480712166172106, 0.8482513337285121, 0.8484310242747187, 0.8486102897693673, 0.8487891317188423, 0.8489675516224189, 0.849145550972304, 0.8493231312536786, 0.8495002939447384, 0.8496770405167352, 0.8498533724340176, 0.8500292911540714, 0.85020479812756, 0.8503798947983635, 0.8505545826036194, 0.8507288629737609, 0.8509027373325568, 0.8510762070971495, 0.8512492736780941, 0.8514219384793964, 0.8515942028985507, 0.8517660683265779, 0.8519375361480624, 0.85210860774119, 0.8522792844777842, 0.8524495677233429, 0.8526194588370755, 0.8527889591719379, 0.8529580700746697, 0.853126792885829, 0.8532951289398281, 0.8534630795649685, 0.8536306460834763, 0.8537978298115363, 0.8539646320593268, 0.8541310541310542, 0.8542970973249858, 0.8544627629334849, 0.8546280522430437, 0.8547929665343165, 0.854957507082153, 0.855121675155631, 0.8552854720180894, 0.8554488989271598, 0.8556119571347998, 0.8557746478873239, 0.8559369724254361, 0.8560989319842608, 0.8562605277933745, 0.8564217610768368, 0.8565826330532212, 0.8567431449356463, 0.8569032979318055, 0.8570630932439978, 0.8572225320691579, 0.8573816155988858, 0.857540345019477, 0.857698721511951, 0.8578567462520822, 0.8580144204104271, 0.8581717451523546, 0.8583287216380742, 0.8584853510226644, 0.8586416344561016, 0.8587975730832874, 0.8589531680440772, 0.8591084204733076, 0.8592633315008247, 0.8594179022515102, 0.85957213384531, 0.8597260273972602, 0.8598795840175151, 0.8600328048113723, 0.860185690879301, 0.8603382433169667, 0.8604904632152589, 0.8606423516603158, 0.8607939097335509, 0.8609451385116784, 0.861096039066739, 0.8612466124661247, 0.8613968597726043, 0.8615467820443483, 0.8616963803349541, 0.86184565569347, 0.8619946091644205, 0.8621432417878299, 0.8622915545992469, 0.8624395486297689, 0.8625872249060654, 0.8627345844504022, 0.8628816282806642, 0.8630283574103799, 0.863174772848744, 0.8633208756006406, 0.8634666666666667, 0.8636121470431539, 0.8637573177221927, 0.8639021796916534, 0.8640467339352098, 0.8641909814323607, 0.8643349231584526, 0.8644785600847009, 0.8646218931782126, 0.8647649234020074, 0.8649076517150396, 0.8650500790722193, 0.865192206424434, 0.8653340347185692, 0.8654755648975302, 0.8656167979002625, 0.8657577346617724, 0.8658983761131482, 0.8660387231815804, 0.8661787767903816, 0.8663185378590078, 0.8664580073030778, 0.866597186034393, 0.8667360749609578, 0.8668746749869994, 0.867012987012987, 0.8671510119356513, 0.8672887506480041, 0.8674262040393579, 0.867563372995344, 0.8677002583979329, 0.8678368611254518, 0.8679731820526044, 0.8681092220504895, 0.8682449819866186, 0.8683804627249357, 0.8685156651258347, 0.8686505900461775, 0.8687852383393132, 0.8689196108550947, 0.8690537084398977, 0.8691875319366377, 0.8693210821847882, 0.8694543600203978, 0.869587366276108, 0.8697201017811704, 0.8698525673614642, 0.8699847638395124, 0.8701166920345003, 0.870248352762291, 0.8703797468354431, 0.8705108750632271, 0.8706417382516423, 0.8707723372034326, 0.8709026727181038, 0.8710327455919396, 0.8711625566180171, 0.8712921065862242, 0.8714213962832748, 0.8715504264927245, 0.8716791979949875, 0.871807711567351, 0.871935967983992, 0.872063968015992, 0.872191712431353, 0.8723192019950124, 0.872446437468859, 0.8725734196117472, 0.8727001491795127, 0.8728266269249876, 0.8729528535980149, 0.8730788299454636, 0.8732045567112432, 0.8733300346363186, 0.8734552644587247, 0.8735802469135803, 0.8737049827331032, 0.8738294726466239, 0.8739537173806007, 0.8740777176586325, 0.8742014742014742, 0.8743249877270496, 0.874448258950466, 0.8745712885840274, 0.8746940773372491, 0.8748166259168704, 0.8749389350268686, 0.8750610053684724, 0.8751828376401756, 0.8753044325377496, 0.8754257907542579, 0.875546912980068, 0.8756677999028655, 0.8757884522076662, 0.8759088705768299, 0.8760290556900726, 0.8761490082244799, 0.8762687288545191, 0.8763882182520522, 0.8765074770863482, 0.8766265060240964, 0.8767453057294174, 0.8768638768638769, 0.8769822200864968, 0.8771003360537686, 0.8772182254196643, 0.8773358888356493, 0.8774533269506941, 0.8775705404112865, 0.8776875298614429, 0.8778042959427208, 0.8779208392942298, 0.8780371605526441, 0.8781532603522132, 0.8782691393247741, 0.8783847980997624, 0.878500237304224, 0.878615457562826, 0.8787304594978683, 0.8788452437292948, 0.8789598108747045, 0.8790741615493624, 0.8791882963662104, 0.8793022159358793, 0.8794159208666981, 0.8795294117647059, 0.8796426892336624, 0.8797557538750587, 0.8798686062881277, 0.8799812470698547, 0.8800936768149883, 0.8802058961160505, 0.8803179055633473, 0.880429705744979, 0.8805412972468503, 0.8806526806526807, 0.8807638565440149, 0.8808748255002327, 0.8809855880985588, 0.8810961449140734, 0.8812064965197216, 0.8813166434863235, 0.8814265863825845, 0.8815363257751041, 0.8816458622283865, 0.8817551963048499, 0.8818643285648362, 0.8819732595666205, 0.8820819898664211, 0.8821905200184077, 0.8822988505747127, 0.8824069820854387, 0.8825149150986691, 0.8826226501604768, 0.8827301878149336, 0.882837528604119, 0.8829446730681298, 0.8830516217450891, 0.8831583751711547, 0.883264933880529, 0.883371298405467, 0.8834774692762859, 0.8835834470213734, 0.8836892321671967, 0.8837948252383114, 0.8839002267573696, 0.8840054372451291, 0.8841104572204618, 0.8842152872003618, 0.8843199276999548, 0.8844243792325056, 0.8845286423094272, 0.8846327174402884, 0.884736605132823, 0.8848403058929375, 0.8849438202247191, 0.8850471486304445, 0.8851502916105877, 0.8852532496638279, 0.8853560232870578, 0.8854586129753915, 0.8855610192221726, 0.8856632425189817, 0.8857652833556448, 0.8858671422202408, 0.8859688195991091, 0.886070315976858, 0.8861716318363717, 0.8862727676588184, 0.8863737239236573, 0.8864745011086475, 0.8865750996898538, 0.8866755201416556, 0.8867757629367536, 0.8868758285461776, 0.8869757174392936, 0.8870754300838112, 0.8871749669457911, 0.8872743284896522, 0.8873735151781786, 0.8874725274725275, 0.8875713658322354, 0.887670030715226, 0.8877685225778168, 0.8878668418747262, 0.887964989059081, 0.8880629645824224, 0.8881607688947138, 0.8882584024443474, 0.888355865678151, 0.8884531590413943, 0.8885502829777971, 0.8886472379295346, 0.8887440243372446, 0.8888406426400347}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/tanh_table.tb b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/tanh_table.tb new file mode 100644 index 0000000000..01ba076319 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/activation_tables/tanh_table.tb @@ -0,0 +1 @@ +static const typename CONFIG_T::table_t tanh_table[1024] = {0.001953122516476924, 0.005859307945695478, 0.009765314570983716, 0.013671023213971423, 0.017576314732656768, 0.02148107003594046, 0.02538517009814669, 0.02928849597352715, 0.03319092881074426, 0.03709234986733006, 0.040992640524116775, 0.04489168229963561, 0.048789356864479785, 0.0526855460556284, 0.05658013189072718, 0.060472996582322754, 0.06436402255204648, 0.06825309244474453, 0.07214008914255045, 0.07602489577889665, 0.07990739575246146, 0.08378747274104796, 0.08766501071539134, 0.09153989395289128, 0.09541200705126586, 0.09928123494212375, 0.1031474629044512, 0.10701057657801066, 0.11087046197664768, 0.1147270055015028, 0.11858009395412547, 0.12242961454948657, 0.1262754549288867, 0.13011750317275683, 0.13395564781334884, 0.1377897778473124, 0.14161978274815565, 0.14544555247858673, 0.14926697750273335, 0.15308394879823747, 0.15689635786822267, 0.1607040967531314, 0.16450705804242943, 0.1683051348861752, 0.17209822100645114, 0.1758862107086552, 0.1796689988926494, 0.18344648106376393, 0.1872185533436538, 0.1909851124810063, 0.19474605586209706, 0.1985012815211924, 0.20225068815079633, 0.20599417511173998, 0.20973164244311157, 0.2134629908720254, 0.21718812182322753, 0.2209069374285372, 0.2246193405361218, 0.22832523471960375, 0.23202452428699857, 0.23571711428948167, 0.23940291052998322, 0.24308181957160963, 0.24675374874589015, 0.25041860616084793, 0.25407630070889364, 0.25772674207454177, 0.26136984074194725, 0.2650055080022628, 0.26863365596081523, 0.27225419754410035, 0.2758670465065959, 0.2794721174373912, 0.2830693257666341, 0.2866585877717932, 0.29023982058373665, 0.29381294219262516, 0.2973778714536212, 0.30093452809241167, 0.30448283271054605, 0.3080227067905886, 0.3115540727010845, 0.3150768537013413, 0.3185909739460237, 0.32209635848956353, 0.3255929332903842, 0.32908062521494014, 0.33255936204157155, 0.33602907246417457, 0.33948968609568775, 0.34294113347139465, 0.34638334605204396, 0.34981625622678686, 0.35323979731593286, 0.35665390357352433, 0.3600585101897311, 0.36345355329306545, 0.3668389699524182, 0.3702146981789175, 0.37358067692761043, 0.3769368460989694, 0.38028314654022305, 0.38361952004651423, 0.3869459093618851, 0.3902622581800912, 0.393568511145245, 0.39686461385229144, 0.40015051284731523, 0.4034261556276825, 0.40669149064201776, 0.40994646729001727, 0.4131910359221008, 0.4164251478389035, 0.4196487552906083, 0.42286181147612184, 0.4260642705420944, 0.42925608758178646, 0.4324372186337826, 0.4356076206805551, 0.43876725164687896, 0.44191607039810005, 0.44505403673825744, 0.44818111140806344, 0.45129725608274124, 0.4544024333697234, 0.4574966068062129, 0.4605797408566076, 0.46365180090979174, 0.46671275327629513, 0.4697625651853225, 0.47280120478165494, 0.4758286411224257, 0.4788448441737717, 0.48184978480736396, 0.48484343479681746, 0.4878257668139841, 0.49079675442512954, 0.49375637208699696, 0.49670459514275933, 0.4996413998178624, 0.5025667632157604, 0.5054806633135474, 0.5083830789574848, 0.5112739898584294, 0.5141533765871616, 0.5170212205696182, 0.51987750408203, 0.522722210245969, 0.5255553230233032, 0.5283768272110657, 0.5311867084362375, 0.5339849531504463, 0.5367715486245844, 0.5395464829433482, 0.5423097449996985, 0.5450613244892488, 0.5478012119045784, 0.5505293985294758, 0.553245876433114, 0.5559506384641584, 0.5586436782448114, 0.5613249901647942, 0.5639945693752683, 0.5666524117826994, 0.5692985140426654, 0.5719328735536093, 0.5745554884505422, 0.5771663575986942, 0.5797654805871192, 0.5823528577222526, 0.584928490021426, 0.5874923792063391, 0.5900445276964925, 0.5925849386025818, 0.5951136157198557, 0.5976305635214401, 0.6001357871516299, 0.6026292924191495, 0.6051110857903865, 0.607581174382596, 0.6100395659570823, 0.6124862689123564, 0.6149212922772712, 0.6173446457041394, 0.6197563394618308, 0.622156384428855, 0.6245447920864292, 0.626921574511533, 0.6292867443699518, 0.6316403149093114, 0.6339822999521033, 0.6363127138887049, 0.6386315716703941, 0.6409388888023603, 0.6432346813367147, 0.6455189658654995, 0.647791759513698, 0.6500530799322478, 0.6523029452910583, 0.6545413742720324, 0.6567683860620969, 0.6589840003462389, 0.6611882373005535, 0.6633811175853009, 0.6655626623379767, 0.6677328931663947, 0.6698918321417847, 0.6720395017919056, 0.6741759250941762, 0.6763011254688233, 0.6784151267720495, 0.6805179532892217, 0.6826096297280798, 0.6846901812119696, 0.6867596332730981, 0.6888180118458135, 0.6908653432599117, 0.6929016542339679, 0.6949269718686973, 0.6969413236403429, 0.6989447373940932, 0.700937241337531, 0.7029188640341113, 0.7048896343966732, 0.7068495816809831, 0.7087987354793126, 0.7107371257140495, 0.7126647826313447, 0.714581736794795, 0.7164880190791617, 0.7183836606641271, 0.7202686930280879, 0.7221431479419888, 0.7240070574631929, 0.725860453929394, 0.7277033699525678, 0.7295358384129643, 0.7313578924531419, 0.7331695654720433, 0.7349708911191131, 0.7367619032884597, 0.7385426361130593, 0.7403131239590041, 0.7420734014197947, 0.7438235033106778, 0.7455634646630276, 0.7472933207187733, 0.7490131069248731, 0.7507228589278325, 0.7524226125682707, 0.7541124038755324, 0.7557922690623472, 0.7574622445195368, 0.7591223668107678, 0.7607726726673547, 0.7624131989831084, 0.7640439828092348, 0.7656650613492801, 0.7672764719541264, 0.7688782521170336, 0.7704704394687326, 0.7720530717725652, 0.7736261869196748, 0.7751898229242445, 0.7767440179187864, 0.7782888101494783, 0.7798242379715503, 0.7813503398447221, 0.7828671543286876, 0.7843747200786502, 0.7858730758409075, 0.7873622604484849, 0.7888423128168192, 0.7903132719394911, 0.7917751768840074, 0.7932280667876327, 0.7946719808532692, 0.7961069583453873, 0.7975330385860041, 0.7989502609507118, 0.8003586648647537, 0.8017582897991509, 0.8031491752668753, 0.8045313608190742, 0.8059048860413396, 0.8072697905500292, 0.8086261139886337, 0.8099738960241923, 0.8113131763437559, 0.8126439946508985, 0.8139663906622758, 0.8152804041042302, 0.8165860747094443, 0.8178834422136397, 0.8191725463523238, 0.8204534268575822, 0.8217261234549168, 0.8229906758601315, 0.824247123776262, 0.8254955068905512, 0.8267358648714712, 0.8279682373657885, 0.8291926639956751, 0.8304091843558634, 0.8316178380108457, 0.8328186644921168, 0.8340117032954618, 0.8351969938782852, 0.8363745756569845, 0.837544488004366, 0.8387067702471035, 0.8398614616632383, 0.8410086014797218, 0.8421482288699994, 0.8432803829516362, 0.8444051027839821, 0.8455224273658795, 0.8466323956334096, 0.8477350464576805, 0.8488304186426526, 0.8499185509230057, 0.8509994819620438, 0.8520732503496384, 0.8531398946002118, 0.8541994531507558, 0.8552519643588911, 0.8562974665009613, 0.8573359977701661, 0.8583675962747301, 0.8593923000361077, 0.8604101469872251, 0.8614211749707569, 0.8624254217374391, 0.8634229249444159, 0.8644137221536221, 0.8653978508301998, 0.8663753483409481, 0.8673462519528076, 0.8683105988313778, 0.8692684260394673, 0.8702197705356761, 0.8711646691730119, 0.8721031586975362, 0.8730352757470431, 0.87396105684977, 0.8748805384231372, 0.8757937567725201, 0.8767007480900508, 0.8776015484534491, 0.8784961938248833, 0.8793847200498616, 0.8802671628561497, 0.8811435578527194, 0.8820139405287241, 0.8828783462525023, 0.8837368102706088, 0.8845893677068734, 0.8854360535614857, 0.8862769027101072, 0.8871119499030092, 0.8879412297642365, 0.8887647767907969, 0.8895826253518762, 0.8903948096880768, 0.8912013639106833, 0.8920023220009494, 0.8927977178094115, 0.8935875850552238, 0.8943719573255181, 0.8951508680747863, 0.8959243506242848, 0.8966924381614626, 0.8974551637394105, 0.8982125602763325, 0.8989646605550382, 0.8997114972224576, 0.900453102789175, 0.9011895096289853, 0.9019207499784688, 0.9026468559365879, 0.9033678594643014, 0.9040837923842003, 0.9047946863801616, 0.9055005729970204, 0.906201483640262, 0.9068974495757315, 0.907588501929361, 0.9082746716869157, 0.908955989693756, 0.9096324866546183, 0.9103041931334113, 0.9109711395530301, 0.9116333561951862, 0.9122908732002531, 0.9129437205671294, 0.9135919281531152, 0.914235525673806, 0.9148745427030003, 0.9155090086726231, 0.9161389528726628, 0.9167644044511236, 0.9173853924139915, 0.918001945625214, 0.918614092806694, 0.9192218625382966, 0.9198252832578694, 0.9204243832612753, 0.9210191907024387, 0.9216097335934037, 0.9221960398044045, 0.9227781370639482, 0.923356052958909, 0.9239298149346346, 0.9244994502950633, 0.9250649862028528, 0.9256264496795197, 0.9261838676055898, 0.926737266720759, 0.9272866736240645, 0.9278321147740662, 0.9283736164890385, 0.9289112049471703, 0.9294449061867768, 0.9299747461065186, 0.9305007504656319, 0.9310229448841655, 0.9315413548432288, 0.9320560056852467, 0.9325669226142242, 0.9330741306960186, 0.9335776548586194, 0.9340775198924374, 0.9345737504506001, 0.9350663710492557, 0.9355554060678842, 0.9360408797496154, 0.9365228162015541, 0.9370012393951132, 0.9374761731663515, 0.9379476412163205, 0.9384156671114154, 0.9388802742837343, 0.9393414860314421, 0.9397993255191411, 0.9402538157782474, 0.9407049797073728, 0.9411528400727124, 0.9415974195084374, 0.9420387405170937, 0.9424768254700044, 0.9429116966076793, 0.9433433760402272, 0.9437718857477744, 0.9441972475808866, 0.9446194832609968, 0.9450386143808358, 0.9454546624048684, 0.9458676486697328, 0.9462775943846843, 0.9466845206320427, 0.9470884483676438, 0.9474893984212943, 0.94788739149723, 0.9482824481745777, 0.9486745889078202, 0.9490638340272649, 0.9494502037395147, 0.9498337181279428, 0.9502143971531694, 0.9505922606535419, 0.9509673283456176, 0.9513396198246488, 0.9517091545650702, 0.9520759519209894, 0.9524400311266785, 0.9528014112970693, 0.9531601114282494, 0.9535161503979608, 0.9538695469661004, 0.9542203197752223, 0.9545684873510417, 0.9549140681029412, 0.9552570803244771, 0.9555975421938886, 0.9559354717746081, 0.9562708870157726, 0.9566038057527362, 0.9569342457075841, 0.957262224489648, 0.9575877595960216, 0.957910868412078, 0.9582315682119872, 0.9585498761592351, 0.9588658093071432, 0.9591793845993878, 0.9594906188705219, 0.9597995288464959, 0.960106131145179, 0.9604104422768823, 0.960712478644881, 0.9610122565459365, 0.961309792170821, 0.9616051016048396, 0.9618982008283541, 0.9621891057173066, 0.9624778320437432, 0.9627643954763366, 0.9630488115809109, 0.9633310958209634, 0.9636112635581888, 0.9638893300530013, 0.9641653104650574, 0.9644392198537781, 0.964711073178871, 0.9649808853008506, 0.9652486709815609, 0.9655144448846942, 0.965778221576312, 0.9660400155253633, 0.9662998411042045, 0.9665577125891157, 0.9668136441608192, 0.9670676499049949, 0.967319743812797, 0.9675699397813675, 0.9678182516143512, 0.9680646930224079, 0.9683092776237253, 0.9685520189445289, 0.968792930419593, 0.9690320253927495, 0.9692693171173956, 0.9695048187570008, 0.9697385433856126, 0.9699705039883609, 0.9702007134619618, 0.970429184615219, 0.970655930169525, 0.9708809627593611, 0.971104294932795, 0.9713259391519784, 0.971545907793642, 0.9717642131495902, 0.9719808674271937, 0.9721958827498814, 0.9724092711576295, 0.972621044607451, 0.9728312149738819, 0.9730397940494674, 0.9732467935452451, 0.9734522250912285, 0.9736561002368871, 0.9738584304516259, 0.9740592271252632, 0.9742585015685065, 0.9744562650134274, 0.9746525286139339, 0.9748473034462418, 0.9750406005093444, 0.9752324307254794, 0.9754228049405961, 0.9756117339248189, 0.9757992283729106, 0.9759852989047323, 0.9761699560657036, 0.9763532103272589, 0.9765350720873036, 0.976715551670668, 0.9768946593295584, 0.977072405244008, 0.9772487995223247, 0.977423852201538, 0.9775975732478434, 0.9777699725570449, 0.9779410599549964, 0.9781108451980403, 0.978279337973445, 0.9784465478998399, 0.978612484527649, 0.9787771573395223, 0.9789405757507652, 0.9791027491097665, 0.9792636866984237, 0.9794233977325676, 0.9795818913623835, 0.9797391766728313, 0.9798952626840638, 0.9800501583518427, 0.9802038725679527, 0.9803564141606139, 0.9805077918948919, 0.9806580144731061, 0.9808070905352362, 0.9809550286593267, 0.9811018373618892, 0.9812475250983027, 0.9813921002632128, 0.9815355711909275, 0.9816779461558124, 0.9818192333726831, 0.9819594409971957, 0.982098577126236, 0.9822366497983059, 0.9823736669939083, 0.9825096366359297, 0.9826445665900215, 0.9827784646649785, 0.9829113386131162, 0.9830431961306452, 0.9831740448580446, 0.9833038923804335, 0.983432746227939, 0.9835606138760643, 0.9836875027460532, 0.9838134202052539, 0.9839383735674799, 0.9840623700933695, 0.9841854169907429, 0.9843075214149583, 0.9844286904692644, 0.9845489312051531, 0.9846682506227081, 0.9847866556709535, 0.9849041532481988, 0.9850207502023832, 0.9851364533314176, 0.9852512693835244, 0.9853652050575754, 0.9854782670034288, 0.9855904618222625, 0.9857017960669073, 0.9858122762421769, 0.985921908805197, 0.9860307001657314, 0.9861386566865075, 0.9862457846835387, 0.9863520904264462, 0.9864575801387776, 0.9865622599983247, 0.9866661361374387, 0.9867692146433442, 0.9868715015584509, 0.9869730028806637, 0.9870737245636906, 0.9871736725173493, 0.987272852607872, 0.9873712706582073, 0.9874689324483219, 0.9875658437154993, 0.9876620101546372, 0.9877574374185429, 0.9878521311182276, 0.9879460968231971, 0.9880393400617435, 0.9881318663212327, 0.988223681048391, 0.9883147896495903, 0.9884051974911316, 0.9884949098995256, 0.9885839321617731, 0.9886722695256424, 0.988759927199946, 0.9888469103548151, 0.9889332241219719, 0.9890188735950015, 0.9891038638296208, 0.989188199843946, 0.9892718866187598, 0.9893549290977744, 0.9894373321878954, 0.9895191007594821, 0.9896002396466073, 0.9896807536473149, 0.9897606475238764, 0.9898399260030449, 0.9899185937763088, 0.9899966555001419, 0.9900741157962544, 0.9901509792518399, 0.990227250419823, 0.9903029338191027, 0.9903780339347975, 0.9904525552184857, 0.9905265020884466, 0.9905998789298984, 0.990672690095236, 0.9907449399042657, 0.9908166326444404, 0.990887772571091, 0.9909583639076579, 0.9910284108459206, 0.9910979175462254, 0.991166888137712, 0.9912353267185385, 0.9913032373561048, 0.9913706240872746, 0.9914374909185966, 0.9915038418265225, 0.9915696807576257, 0.9916350116288171, 0.9916998383275596, 0.991764164712082, 0.9918279946115905, 0.9918913318264799, 0.9919541801285418, 0.9920165432611726, 0.9920784249395805, 0.9921398288509897, 0.9922007586548443, 0.9922612179830101, 0.9923212104399763, 0.9923807396030542, 0.9924398090225754, 0.9924984222220892, 0.9925565826985572, 0.9926142939225484, 0.9926715593384313, 0.9927283823645657, 0.992784766393493, 0.9928407147921248, 0.992896230901931, 0.9929513180391258, 0.993005979494853, 0.9930602185353695, 0.9931140384022283, 0.9931674423124596, 0.9932204334587506, 0.9932730150096246, 0.993325190109619, 0.9933769618794607, 0.9934283334162421, 0.9934793077935947, 0.9935298880618619, 0.9935800772482705, 0.993629878357101, 0.9936792943698574, 0.993728328245434, 0.9937769829202834, 0.9938252613085817, 0.9938731663023931, 0.993920700771833, 0.9939678675652309, 0.9940146695092907, 0.9940611094092515, 0.9941071900490459, 0.9941529141914579, 0.9941982845782797, 0.9942433039304672, 0.9942879749482945, 0.9943323003115067, 0.9943762826794733, 0.9944199246913388, 0.9944632289661725, 0.9945061981031184, 0.9945488346815429, 0.9945911412611814, 0.9946331203822851, 0.9946747745657654, 0.9947161063133377, 0.9947571181076649, 0.9947978124124984, 0.9948381916728202, 0.9948782583149814, 0.994918014746842, 0.9949574633579087, 0.9949966065194714, 0.9950354465847396, 0.9950739858889769, 0.9951122267496357, 0.9951501714664894, 0.9951878223217654, 0.9952251815802753, 0.9952622514895458, 0.995299034279948, 0.9953355321648253, 0.9953717473406213, 0.9954076819870058, 0.9954433382670013, 0.9954787183271067, 0.9955138242974216, 0.9955486582917695, 0.9955832224078195, 0.9956175187272073, 0.9956515493156558, 0.9956853162230946, 0.9957188214837784, 0.9957520671164045, 0.9957850551242301, 0.9958177874951878, 0.9958502662020015, 0.9958824932022998, 0.9959144704387304, 0.9959461998390723, 0.9959776833163476, 0.996008922768933, 0.9960399200806695, 0.9960706771209725, 0.9961011957449396, 0.9961314777934593, 0.9961615250933178, 0.9961913394573049, 0.9962209226843203, 0.9962502765594775, 0.9962794028542086, 0.9963083033263669, 0.9963369797203299, 0.9963654337671003, 0.9963936671844081, 0.9964216816768098, 0.9964494789357883, 0.996477060639852, 0.9965044284546325, 0.9965315840329818, 0.9965585290150696, 0.9965852650284783, 0.9966117936882992, 0.9966381165972262, 0.9966642353456502, 0.9966901515117519, 0.9967158666615943, 0.9967413823492146, 0.9967667001167154, 0.9967918214943546, 0.9968167480006359, 0.9968414811423973, 0.9968660224148999, 0.9968903733019153, 0.9969145352758135, 0.9969385097976483, 0.9969622983172443, 0.9969859022732812, 0.9970093230933791, 0.9970325621941818, 0.997055620981441, 0.997078500850098, 0.9971012031843667, 0.9971237293578148, 0.9971460807334446, 0.9971682586637732, 0.9971902644909129, 0.9972120995466491, 0.9972337651525205, 0.9972552626198952, 0.9972765932500496, 0.9972977583342442, 0.9973187591538006, 0.9973395969801765, 0.997360273075041, 0.9973807886903495, 0.9974011450684173, 0.9974213434419927, 0.9974413850343311, 0.997461271059266, 0.9974810027212813, 0.9975005812155828, 0.9975200077281691, 0.9975392834359007, 0.9975584095065713, 0.9975773870989753, 0.9975962173629779, 0.9976149014395823, 0.9976334404609978, 0.9976518355507064, 0.9976700878235301, 0.9976881983856964, 0.9977061683349047, 0.9977239987603903, 0.9977416907429901, 0.9977592453552061, 0.9977766636612695, 0.9977939467172037, 0.9978110955708869, 0.9978281112621146, 0.9978449948226613, 0.9978617472763419, 0.9978783696390724, 0.9978948629189305, 0.9979112281162151, 0.9979274662235067, 0.9979435782257255, 0.9979595651001909, 0.9979754278166785, 0.9979911673374792, 0.9980067846174554, 0.9980222806040987, 0.9980376562375857, 0.9980529124508342, 0.9980680501695591, 0.9980830703123275, 0.9980979737906126, 0.9981127615088494, 0.9981274343644879, 0.9981419932480466, 0.9981564390431656, 0.9981707726266598, 0.9981849948685706, 0.9981991066322184, 0.9982131087742537, 0.9982270021447085, 0.9982407875870469, 0.9982544659382157, 0.9982680380286942, 0.998281504682544, 0.9982948667174579, 0.9983081249448094, 0.9983212801697003, 0.9983343331910101, 0.9983472848014422, 0.9983601357875729, 0.9983728869298971, 0.998385539002876, 0.9983980927749829, 0.9984105490087493, 0.9984229084608107, 0.9984351718819516, 0.9984473400171507, 0.9984594136056256, 0.9984713933808768, 0.9984832800707316, 0.998495074397388, 0.9985067770774579, 0.9985183888220099, 0.9985299103366121, 0.998541342321374, 0.9985526854709891, 0.9985639404747761, 0.9985751080167204, 0.9985861887755151, 0.9985971834246016, 0.9986080926322101, 0.9986189170613998, 0.9986296573700987, 0.9986403142111427, 0.9986508882323154, 0.9986613800763864, 0.9986717903811502, 0.9986821197794645, 0.9986923688992884, 0.9987025383637192, 0.9987126287910315, 0.9987226407947125, 0.9987325749835002, 0.9987424319614193, 0.9987522123278179, 0.9987619166774031, 0.9987715456002771, 0.9987810996819728, 0.9987905795034885, 0.9987999856413231, 0.998809318667511, 0.9988185791496562, 0.9988277676509666, 0.9988368847302878, 0.9988459309421367, 0.9988549068367351, 0.9988638129600426, 0.9988726498537893, 0.9988814180555091, 0.9988901180985712, 0.9988987505122128, 0.9989073158215706, 0.9989158145477126, 0.99892424720767, 0.9989326143144671, 0.9989409163771535, 0.9989491539008339, 0.9989573273866991, 0.9989654373320556, 0.9989734842303563, 0.9989814685712296, 0.9989893908405094, 0.998997251520264, 0.9990050510888254, 0.9990127900208181, 0.9990204687871878, 0.9990280878552293, 0.9990356476886157, 0.9990431487474253, 0.9990505914881699, 0.9990579763638224, 0.9990653038238438, 0.9990725743142107, 0.9990797882774419, 0.9990869461526253, 0.9990940483754441, 0.9991010953782038, 0.9991080875898575, 0.9991150254360319, 0.9991219093390539, 0.9991287397179747, 0.9991355169885964, 0.9991422415634963, 0.999148913852052, 0.9991555342604663, 0.9991621031917918, 0.9991686210459546, 0.9991750882197795, 0.9991815051070132, 0.9991878720983481, 0.9991941895814466, 0.9992004579409636, 0.9992066775585705, 0.9992128488129778, 0.9992189720799582, 0.9992250477323696, 0.999231076140177, 0.9992370576704752, 0.9992429926875113, 0.999248881552706, 0.9992547246246761, 0.9992605222592563, 0.9992662748095199, 0.999271982625801, 0.9992776460557151, 0.9992832654441806, 0.9992888411334393, 0.9992943734630775, 0.999299862770046, 0.999305309388681, 0.9993107136507242, 0.9993160758853425, 0.9993213964191487, 0.9993266755762206}; diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_activation.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_activation.h new file mode 100644 index 0000000000..ab1874ec10 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_activation.h @@ -0,0 +1,499 @@ +#ifndef NNET_ACTIVATION_H_ +#define NNET_ACTIVATION_H_ + +#include "nnet_common.h" + +namespace nnet { + +struct activ_config { + // IO size + static constexpr unsigned n_in = 10; + + // Internal info + static constexpr unsigned table_size = 512; + + // Resource reuse info + static constexpr unsigned io_type = io_parallel; + static constexpr unsigned reuse_factor = 1; + + // Internal data type definitions + typedef ac_fixed<16, 8> table_t; +}; + +// ************************************************* +// LINEAR Activation -- See Issue 53 +// ************************************************* +template void linear(const data_T &data, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + res[ii] = datareg; + } +} + +// ************************************************* +// RELU Activation +// ************************************************* +template void relu(const data_T &data, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + if (datareg > 0) + res[ii] = datareg; + else + res[ii] = 0; + } +} + +template void relu_max(const data_T &data, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + if (datareg < 0) + res[ii] = 0; + else if (datareg > MAX_INT) + res[ii] = MAX_INT; + else + res[ii] = datareg; + } +} + +template void relu6(const data_T &data, res_T &res) { + relu_max(data, res); +} + +template void relu1(const data_T &data, res_T &res) { + relu_max(data, res); +} + +// ************************************************* +// Sigmoid Activation +// ************************************************* +template void sigmoid(const data_T &data, res_T &res) { + static constexpr int MAX_VALUE = 8; +#include "activation_tables/sigmoid_table.tb" + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + [[intel::fpga_register]] typename data_T::value_type absoluteValue; + [[intel::fpga_register]] typename res_T::value_type temp2; + if (data[ii] < 0) { + absoluteValue = -data[ii]; + } else { + absoluteValue = data[ii]; + } + int index = (absoluteValue * (CONFIG_T::table_size / MAX_VALUE)).to_int(); + if (absoluteValue > MAX_VALUE) + index = CONFIG_T::table_size - 1; + temp2 = static_cast(sigmoid_table[index]); + if (data[ii] < 0) { + res[ii] = 1 - temp2; + } else { + res[ii] = temp2; + } + } +} + +// ************************************************* +// Softmax Activation +// ************************************************* + +enum class softmax_implementation { latency = 0, legacy = 1, stable = 2, argmax = 3 }; + +template inline unsigned softmax_stable_idx_from_real_val(const data_T x) { + // Number of address bits for table + static constexpr int N = ceillog2::val; + + // Slice the top N bits of the input + [[intel::fpga_register]] ac_int y = x.template slc(x.width - N - 1); + // If x is the most negative value, the slice will be 0, so we need to set the 0-th bit to ensure correctness + if (x != 0 && y == 0) + y[0] = 1; + return y.to_uint(); +} + +template inline unsigned softmax_latency_idx_from_real_val(const data_T x) { + // Number of address bits for table + static constexpr int N = ceillog2::val; + + // Slice the top N bits of the input + [[intel::fpga_register]] ac_int y = x.template slc(x.width - N); + return y.to_uint(); +} + +template void softmax_stable(const data_T &data, res_T &res) { +// Look-up tables +#include "activation_tables/exp_table.tb" +#include "activation_tables/invert_table.tb" + + // Find maximum + Op_max op_max; + [[intel::fpga_register]] auto x_max = + reduce>(data.data(), op_max); + + // For the diffs, use the same type as the input but force rounding and saturation + [[intel::fpga_register]] ac_fixed + d_xi_xmax[CONFIG_T::n_in]; + #pragma unroll + for (unsigned i = 0; i < CONFIG_T::n_in; i++) { + d_xi_xmax[i] = data[i] - x_max; + } + + // Calculate all the e^x's + [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_res[CONFIG_T::n_in]; + #pragma unroll + for (unsigned i = 0; i < CONFIG_T::n_in; i++) { + exp_res[i] = exp_table[softmax_stable_idx_from_real_val(d_xi_xmax[i])]; + } + + // Explicitly sum previously calculated exponentials with an adder tree + Op_add op_add; + [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_sum = + reduce>(exp_res, op_add); + + // Multiply previously calculated exponetials with the reciprocal of the sum + [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = + invert_table[softmax_stable_idx_from_real_val(exp_sum)]; + #pragma unroll + for (unsigned i = 0; i < CONFIG_T::n_in; i++) { + res[i] = exp_res[i] * inv_exp_sum; + } +} + +// TODO - Improve accuracy +template void softmax_latency(const data_T &data, res_T &res) { +#include "activation_tables/exp_table_latency.tb" +#include "activation_tables/invert_table_latency.tb" + + // Calculate all the e^x's + [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_res[CONFIG_T::n_in]; + #pragma unroll + for (unsigned i = 0; i < CONFIG_T::n_in; i++) { + exp_res[i] = exp_table_latency[softmax_latency_idx_from_real_val(data[i])]; + } + + // Explicitly sum the results with an adder tree. + Op_add op_add; + [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_sum = + reduce>(exp_res, op_add); + + // Multiply previously calculated exponetials with the reciprocal of the sum + [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = + invert_table_latency[softmax_latency_idx_from_real_val(exp_sum)]; + #pragma unroll + for (unsigned i = 0; i < CONFIG_T::n_in; i++) { + res[i] = exp_res[i] * inv_exp_sum; + } +} + +template void softmax_legacy(const data_T &data, res_T &res) { +#include "activation_tables/exp_table_legacy.tb" +#include "activation_tables/invert_table_legacy.tb" + + [[intel::fpga_register]] int data_round[CONFIG_T::n_in]; +New_loop: + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + data_round[ii] = (data[ii] * CONFIG_T::table_size / 16).to_int(); + } +NN_Outer: + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + typename CONFIG_T::exp_table_t exp_res_temp = 0; + NN_Inner: + #pragma unroll + for (int jj = 0; jj < CONFIG_T::n_in; jj++) { + if (ii == jj) { + exp_res_temp += 1; + } else { + int _data_cache = (data_round[jj] - data_round[ii]); + int index = _data_cache + 8 * CONFIG_T::table_size / 16; + + if (index < 0) + index = 0; + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + + typename CONFIG_T::exp_table_t temp_exp = exp_table_legacy[index]; + exp_res_temp += temp_exp; + } + } + int exp_res_index = (exp_res_temp * CONFIG_T::table_size / 64).to_int(); + if (exp_res_index < 0) + exp_res_index = 0; + if (exp_res_index > CONFIG_T::table_size - 1) + exp_res_index = CONFIG_T::table_size - 1; + res[ii] = invert_table_legacy[exp_res_index]; + } +} + +template void softmax_argmax(const data_T &data, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_in; i++) { + res[i] = static_cast(0); + } + + [[intel::fpga_register]] auto maximum = data[0]; + [[intel::fpga_register]] int idx = 0; + + [[intel::initiation_interval(1)]] for (int i = 1; i < CONFIG_T::n_in; i++) { + if (data[i] > maximum) { + maximum = data[i]; + idx = i; + } + } + + res[idx] = static_cast(1); +} + +template inline void softmax(const data_T &data, res_T &res) { + switch (CONFIG_T::implementation) { + case softmax_implementation::stable: + softmax_stable(data, res); + break; + case softmax_implementation::latency: + softmax_latency(data, res); + break; + case softmax_implementation::legacy: + softmax_legacy(data, res); + break; + default: + softmax_stable(data, res); + break; + case softmax_implementation::argmax: + softmax_argmax(data, res); + break; + } +} + +// ************************************************* +// TanH Activation +// ************************************************* +template void dense_tanh(const data_T &data, res_T &res) { + static constexpr int MAX_VALUE = 4; +// Initialize the lookup table +#include "activation_tables/tanh_table.tb" + // Index into the lookup table based on data + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + [[intel::fpga_register]] typename data_T::value_type temp; + [[intel::fpga_register]] typename res_T::value_type temp2; + if (data[ii] < 0) { + temp = -data[ii]; + } else { + temp = data[ii]; + } + ac_int<16> index = (temp * (CONFIG_T::table_size / MAX_VALUE)).to_int(); + if (temp > MAX_VALUE) + index = CONFIG_T::table_size - 1; + temp2 = static_cast(tanh_table[index]); + if (data[ii] < 0) { + res[ii] = -temp2; + } else { + res[ii] = temp2; + } + } +} + +// ************************************************* +// Hard sigmoid Activation +// ************************************************* +template void hard_sigmoid(const data_T &data, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = CONFIG_T::slope * data[ii] + CONFIG_T::shift; + if (datareg > 1) + datareg = 1; + else if (datareg < 0) + datareg = 0; + res[ii] = datareg; + } +} + +template void hard_tanh(const data_T &data, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto sigmoid = CONFIG_T::slope * data[ii] + CONFIG_T::shift; + if (sigmoid > 1) + sigmoid = 1; + else if (sigmoid < 0) + sigmoid = 0; + res[ii] = 2 * sigmoid - 1; + } +} + +// ************************************************* +// Leaky RELU Activation +// ************************************************* +template +void leaky_relu(const data_T &data, const typename CONFIG_T::param_t alpha, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + if (datareg > 0) + res[ii] = datareg; + else + res[ii] = alpha * datareg; + } +} + +// ************************************************* +// Thresholded RELU Activation +// ************************************************* +template +void thresholded_relu(const data_T &data, const typename CONFIG_T::param_t theta, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + if (datareg > theta) + res[ii] = datareg; + else + res[ii] = 0; + } +} + +// ************************************************* +// Softplus Activation +// ************************************************* +template void softplus(const data_T &data, res_T &res) { +// Initialize the lookup table +#include "activation_tables/softplus_table.tb" + // Index into the lookup table based on data + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + ac_int<16> data_round = (data[ii] * CONFIG_T::table_size / 16).to_int(); + ac_int<16> index = data_round + 8 * CONFIG_T::table_size / 16; + if (index < 0) + index = 0; + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + res[ii] = static_cast(softplus_table[index]); + } +} + +// ************************************************* +// Softsign Activation +// ************************************************* +template void softsign(const data_T &data, res_T &res) { + static constexpr int MAX_VALUE = 8; +// Initialize the lookup table +#include "activation_tables/softsign_table.tb" + + // Index into the lookup table based on data + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + [[intel::fpga_register]] typename data_T::value_type temp; + [[intel::fpga_register]] typename res_T::value_type temp2; + if (data[ii] < 0) { + temp = -data[ii]; + } else { + temp = data[ii]; + } + ac_int<16> index = (temp * CONFIG_T::table_size / MAX_VALUE).to_int(); + if (temp > MAX_VALUE) + index = CONFIG_T::table_size - 1; + temp2 = static_cast(softsign_table[index]); + if (data[ii] < 0) { + res[ii] = -temp2; + } else { + res[ii] = temp2; + } + } +} + +// ************************************************* +// ELU Activation +// ************************************************* +template +void elu(const data_T &data, const typename CONFIG_T::param_t alpha, res_T &res) { +// Initialize the lookup table +#include "activation_tables/elu_table.tb" + // Index into the lookup table based on data + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + if (datareg >= 0) { + res[ii] = datareg; + } else { + ac_int<16> index = (datareg * CONFIG_T::table_size / -8).to_int(); + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + res[ii] = alpha * elu_table[index]; + } + } +} + +// ************************************************* +// SELU Activation +// ************************************************* +template void selu(const data_T &data, res_T &res) { +// Initialize the lookup table +#include "activation_tables/selu_table.tb" + // Index into the lookup table based on data + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + if (datareg >= 0) { + res[ii] = static_cast(1.0507009873554804934193349852946) * datareg; + } else { + ac_int<16> index = (datareg * CONFIG_T::table_size / -8).to_int(); + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + res[ii] = selu_table[index]; + } + } +} + +// ************************************************* +// PReLU Activation +// ************************************************* +template +void prelu(const data_T &data, const typename CONFIG_T::param_t &alpha, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + if (datareg > 0) + res[ii] = datareg; + else + res[ii] = alpha[ii] * datareg; + } +} + +// ************************************************* +// Binary TanH Activation +// ************************************************* +template void binary_tanh(const data_T &data, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = data[ii]; + typename res_T::value_type cache; + if (datareg > 0) + cache = 1; + else + cache = -1; + + res[ii] = cache; + } +} + +// ************************************************* +// Ternary TanH Activation +// ************************************************* +template void ternary_tanh(const data_T &data, res_T &res) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + auto datareg = 2 * data[ii]; + typename res_T::value_type cache; + if (datareg > 1) + cache = 1; + else if (datareg > -1 && datareg <= 1) + cache = 0; + else + cache = -1; + + res[ii] = cache; + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_activation_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_activation_stream.h new file mode 100644 index 0000000000..13de5ab3bb --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_activation_stream.h @@ -0,0 +1,712 @@ +#ifndef NNET_ACTIVATION_STREAM_H_ +#define NNET_ACTIVATION_STREAM_H_ + +#include "nnet_common.h" +#include "nnet_types.h" + +namespace nnet { + +// ************************************************* +// Linear Activation +// ************************************************* +template void linear_stream() { +LinearActLoop: + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + LinearPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + out_data[j] = in_data[j]; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// ReLU Activation +// ************************************************* +template void relu_stream() { +ReLUActLoop: + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + ReLUPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + if (in_data[j] > 0) + out_data[j] = in_data[j]; + else + out_data[j] = 0; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Leaky RELU Activation +// ************************************************* +template void leaky_relu_stream(typename CONFIG_T::param_t alpha) { + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + +LeakyReLUActLoop: + [[intel::initiation_interval(pipeline)]] for (int i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + LeakyReLUPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + if (in_data[j] > 0) + out_data[j] = in_data[j]; + else + out_data[j] = alpha * in_data[j]; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Thresholded RELU Activation +// ************************************************* +template +void thresholded_relu_stream(typename CONFIG_T::param_t theta) { +ThresholdedReLUActLoop: + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + ThresholdedReLUPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + if (in_data[j] > theta) + out_data[j] = in_data[j]; + else + out_data[j] = 0; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// ELU Activation +// ************************************************* +template void elu_stream(typename CONFIG_T::param_t alpha) { +#include "activation_tables/elu_table.tb" + + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + +EluActLoop: + [[intel::initiation_interval(pipeline)]] for (int i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + EluPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type datareg = in_data[j]; + if (datareg >= 0) { + out_data[j] = datareg; + } else { + int index = (datareg * CONFIG_T::table_size / -8).to_int(); + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + out_data[j] = alpha * elu_table[index]; + } + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// SeLU Activation +// ************************************************* +template void selu_stream() { +#include "activation_tables/selu_table.tb" + +SeluActLoop: + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + SeluPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type datareg = in_data[j]; + if (datareg >= 0) { + out_data[j] = + typename ExtractPipeType::value_type::value_type(1.0507009873554804934193349852946) * datareg; + } else { + int index = (datareg * CONFIG_T::table_size / -8).to_int(); + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + out_data[j] = selu_table[index]; + } + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// PReLU Activation +// ************************************************* +template void prelu_stream(typename CONFIG_T::param_t alpha) { + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + +PReLUActLoop: + [[intel::initiation_interval(pipeline)]] for (int i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + PReLUPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + if (in_data[j] > 0) + out_data[j] = in_data[j]; + else + out_data[j] = alpha[i * std::tuple_size::value_type>{} + j] * in_data[j]; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Softplus Activation +// ************************************************* +template void softplus_stream() { +#include "activation_tables/softplus_table.tb" + +SoftplusActLoop: + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + SoftplusPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + [[intel::fpga_register]] int data_round = (in_data[j] * CONFIG_T::table_size / 16).to_int(); + [[intel::fpga_register]] int index = data_round + 8 * CONFIG_T::table_size / 16; + if (index < 0) + index = 0; + else if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + out_data[j] = softplus_table[index]; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Softsign Activation +// ************************************************* +template void softsign_stream() { +#include "activation_tables/softsign_table.tb" + + static const int MAX_VALUE = 8; + +SoftsignActLoop: + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + SoftsignPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type absValue; + ; + if (in_data[j] < 0) { + absValue = -in_data[j]; + } else { + absValue = in_data[j]; + } + ac_int<16> index = (absValue * CONFIG_T::table_size / MAX_VALUE).to_int(); + if (absValue > MAX_VALUE) + index = CONFIG_T::table_size - 1; + if (in_data[j] < 0) { + out_data[j] = + static_cast::value_type::value_type>(-softsign_table[index]); + } else { + out_data[j] = static_cast::value_type::value_type>(softsign_table[index]); + } + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Softmax Activation +// ************************************************* + +template void softmax_stable_stream() { +#include "activation_tables/exp_table.tb" +#include "activation_tables/invert_table.tb" + + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type + data_array[std::tuple_size::value_type>{}]; + +SoftmaxArrayLoop: + [[intel::initiation_interval(pipeline)]] for (unsigned i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + auto in_pack = data_pipe::read(); + + SoftmaxArrayPackLoop: + #pragma unroll + for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + data_array[j] = in_pack[j]; + } + + // Find the max and compute all delta(x_i, x_max) + Op_max::value_type::value_type> op_max; + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type x_max = + reduce::value_type::value_type, + std::tuple_size::value_type>{}, + Op_max::value_type::value_type>>(data_array, op_max); + + // For the diffs, use the same type as the input but force rounding and saturation + [[intel::fpga_register]] ac_fixed::value_type::value_type::width, + ExtractPipeType::value_type::value_type::i_width, true, AC_RND, AC_SAT> + d_xi_xmax[std::tuple_size::value_type>{}]; + #pragma unroll + for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + d_xi_xmax[j] = data_array[j] - x_max; + } + + // Calculate all the e^x's + [[intel::fpga_register]] + typename CONFIG_T::exp_table_t exp_res[std::tuple_size::value_type>{}]; + #pragma unroll + for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + exp_res[j] = + exp_table[softmax_stable_idx_from_real_val::value_type::value_type, + CONFIG_T>(d_xi_xmax[j])]; + } + + // Explicitly sum the results with an adder tree. + // Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing + Op_add op_add; + [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_sum = + reduce::value_type>{}, + Op_add>(exp_res, op_add); + + [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = + invert_table[softmax_stable_idx_from_real_val(exp_sum)]; + typename ExtractPipeType::value_type out_pack; + + SoftmaxInvPackLoop: + #pragma unroll + for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + + // TODO - Find Quartus-equivalent pragma + // #pragma HLS ALLOCATION instances=mul limit=multiplier_limit operation + + out_pack[j] = exp_res[j] * inv_exp_sum; + } + + res_pipe::write(out_pack); + } +} + +template void softmax_latency_stream() { +#include "activation_tables/exp_table_latency.tb" +#include "activation_tables/invert_table_latency.tb" + + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + + // Calculate all the e^x's + [[intel::fpga_register]] + typename CONFIG_T::exp_table_t exp_res[std::tuple_size::value_type>{}]; + +SoftmaxExpLoop: + [[intel::initiation_interval(pipeline)]] for (unsigned i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + auto in_pack = data_pipe::read(); + + SoftmaxExpPackLoop: + #pragma unroll + for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + exp_res[j] = exp_table_latency[softmax_latency_idx_from_real_val< + typename ExtractPipeType::value_type::value_type, CONFIG_T>(in_pack[j])]; + } + + // Explicitly sum the results with an adder tree. + // Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing + Op_add op_add; + [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_sum = + reduce>(exp_res, op_add); + + // Multiply previously calculated exponetials with the reciprocal of the sum + [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = + invert_table_latency[softmax_latency_idx_from_real_val(exp_sum)]; + + typename ExtractPipeType::value_type out_pack; + SoftmaxInvPackLoop: + #pragma unroll + for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + // #pragma HLS ALLOCATION instances=mul limit=multiplier_limit operation + out_pack[j] = exp_res[j] * inv_exp_sum; + } + + res_pipe::write(out_pack); + } +} + +template void softmax_legacy_stream() { +#include "activation_tables/exp_table_legacy.tb" +#include "activation_tables/invert_table_legacy.tb" + + // Index into the lookup table based on data for exponentials + [[intel::fpga_register]] + typename CONFIG_T::table_t exp_res[std::tuple_size::value_type>{}]; + [[intel::fpga_register]] typename CONFIG_T::table_t exp_diff_res; + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type + data_cache[std::tuple_size::value_type>{}]; + +SoftmaxInitLoop: + [[intel::initiation_interval(1)]] for (unsigned s = 0; + s < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + s++) { + auto in_pack = data_pipe::read(); + + SoftmaxInitPackLoop: + #pragma unroll + for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + data_cache[j] = in_pack[j]; + exp_res[j] = 0; + } + + SoftmaxExpLoop: + #pragma unroll + for (int i = 0; i < std::tuple_size::value_type>{}; i++) { + SoftmaxExpInner: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + if (i == j) { + exp_diff_res = 1; + } else { + int data_round = ((data_cache[j] - data_cache[i]) * CONFIG_T::table_size / 16).to_int(); + int index = data_round + 8 * CONFIG_T::table_size / 16; + if (index < 0) + index = 0; + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + exp_diff_res = exp_table_legacy[index]; + } + exp_res[i] += exp_diff_res; + } + } + + typename ExtractPipeType::value_type out_pack; + SoftmaxInvPackLoop: + #pragma unroll + for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + int exp_res_index = (exp_res[j] * CONFIG_T::table_size / 64).to_int(); + if (exp_res_index < 0) + exp_res_index = 0; + if (exp_res_index > CONFIG_T::table_size - 1) + exp_res_index = CONFIG_T::table_size - 1; + out_pack[j] = + static_cast::value_type::value_type>(invert_table_legacy[exp_res_index]); + } + + res_pipe::write(out_pack); + } +} + +template void softmax_argmax_stream() { + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + #pragma unroll + for (int i = 0; i < std::tuple_size::value_type>{}; i++) { + out_data[i] = static_cast::value_type::value_type>(0); + } + + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type maximum = in_data[0]; + [[intel::fpga_register]] int idx = 0; + + [[intel::initiation_interval(1)]] for (int i = 1; + i < std::tuple_size::value_type>{}; i++) { + if (in_data[i] > maximum) { + maximum = in_data[i]; + idx = i; + } + } + + out_data[idx] = static_cast::value_type::value_type>(1); + res_pipe::write(out_data); + } +} + +template void softmax_stream() { + switch (CONFIG_T::implementation) { + case softmax_implementation::latency: + softmax_latency_stream(); + break; + case softmax_implementation::stable: + softmax_stable_stream(); + break; + case softmax_implementation::legacy: + softmax_legacy_stream(); + break; + case softmax_implementation::argmax: + softmax_argmax_stream(); + break; + default: + softmax_stable_stream(); + break; + } +} + +// ************************************************* +// TanH Activation +// ************************************************* +template void dense_tanh_stream() { +#include "activation_tables/tanh_table.tb" + static const int MAX_VALUE = 4; + + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + +TanHActLoop: + [[intel::initiation_interval(pipeline)]] for (int i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + TanHPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type absoluteValue; + + if (in_data[j] < 0) + absoluteValue = (-1) * in_data[j]; + else + absoluteValue = in_data[j]; + + [[intel::fpga_register]] int index; + if (absoluteValue <= MAX_VALUE) + index = (absoluteValue * (CONFIG_T::table_size / MAX_VALUE)).to_int(); + else + index = CONFIG_T::table_size - 1; + + if (in_data[j] > 0) + out_data[j] = tanh_table[index]; + else + out_data[j] = -tanh_table[index]; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Sigmoid Activation +// ************************************************* +template void sigmoid_stream() { +#include "activation_tables/sigmoid_table.tb" + static const int MAX_VALUE = 8; + + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + +SigmoidActLoop: + [[intel::initiation_interval(pipeline)]] for (int i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + SigmoidPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type absoluteValue; + + if (in_data[j] < 0) + absoluteValue = (-1) * in_data[j]; + else + absoluteValue = in_data[j]; + + [[intel::fpga_register]] int index; + if (absoluteValue <= MAX_VALUE) + index = (absoluteValue * (CONFIG_T::table_size / MAX_VALUE)).to_int(); + else + index = CONFIG_T::table_size - 1; + + if (in_data[j] > 0) + out_data[j] = sigmoid_table[index]; + else + out_data[j] = 1 - sigmoid_table[index]; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Hard sigmoid Activation +// ************************************************* +// Note - Theano and Tensorflow might have different definitions for hard sigmoid; could provide two implementations +template void hard_sigmoid_stream() { + + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + +HardSigmoidActLoop: + [[intel::initiation_interval(pipeline)]] for (int i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + HardSigmoidPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + [[intel::fpga_register]] auto datareg = CONFIG_T::slope * in_data[j] + CONFIG_T::shift; + if (datareg > 1) + datareg = 1; + else if (datareg < 0) + datareg = 0; + out_data[j] = datareg; + } + + res_pipe::write(out_data); + } +} + +template void hard_tanh_stream() { + + constexpr unsigned multiplier_limit = + DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + +HardSigmoidActLoop: + [[intel::initiation_interval(pipeline)]] for (int i = 0; + i < CONFIG_T::n_in / + std::tuple_size::value_type>{}; + i++) { + + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + HardSigmoidPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + auto sigmoid = CONFIG_T::slope * in_data[j] + CONFIG_T::shift; + if (sigmoid > 1) + sigmoid = 1; + else if (sigmoid < 0) + sigmoid = 0; + out_data[j] = 2 * sigmoid - 1; + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Binary TanH Activation +// ************************************************* +template void binary_tanh_stream() { +BinaryTanHActLoop: + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + + [[intel::fpga_register]] auto in_data = data_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + BinaryTanHPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + if (in_data[j] > 0) + out_data[j] = static_cast::value_type::value_type>(1); + else + out_data[j] = static_cast::value_type::value_type>(-1); + } + + res_pipe::write(out_data); + } +} + +// ************************************************* +// Ternary TanH Activation +// ************************************************* +template void ternary_tanh_stream() { +TernaryTanHActLoop: + [[intel::initiation_interval( + 1)]] for (int i = 0; i < CONFIG_T::n_in / std::tuple_size::value_type>{}; i++) { + + [[intel::fpga_register]] auto in_data = data_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + TernaryTanHPackLoop: + #pragma unroll + for (int j = 0; j < std::tuple_size::value_type>{}; j++) { + if (in_data[j] > 1) + out_data[j] = static_cast::value_type::value_type>(1); + else if (in_data[j] <= -1) + out_data[j] = static_cast::value_type::value_type>(-1); + else + out_data[j] = static_cast::value_type::value_type>(0); + } + + res_pipe::write(out_data); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_batchnorm.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_batchnorm.h new file mode 100644 index 0000000000..f8e5bcb792 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_batchnorm.h @@ -0,0 +1,104 @@ +#ifndef NNET_BATCHNORM_H_ +#define NNET_BATCHNORM_H_ + +#include "nnet_common.h" +#include "nnet_helpers.h" +#include "nnet_mult.h" + +namespace nnet { + +struct batchnorm_config { + // Internal data type definitions + typedef float bias_t; + typedef float scale_t; + + // Layer Sizes + static const unsigned n_in = 10; + static const unsigned n_filt = -1; + static const unsigned n_scale_bias = 10; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; + static const bool store_weights_in_bram = false; + static const unsigned n_zeros = 0; + // partitioning arrays cyclically to go with roll factors? + + // Default multiplication + template using product = nnet::product::mult; +}; + +template +void normalize(const data_T &data, res_T &res, const typename CONFIG_T::scale_t &scale, + const typename CONFIG_T::bias_t &bias) { +// Calcuate result +Result: + #pragma unroll + for (int ires = 0; ires < CONFIG_T::n_in; ires++) { + if (CONFIG_T::n_filt == -1) { + res[ires] = + CONFIG_T::template product::product( + data[ires], scale[ires]) + + bias[ires]; + } else { + int norm_index = ires % CONFIG_T::n_filt; + res[ires] = + CONFIG_T::template product::product( + data[ires], scale[norm_index]) + + bias[norm_index]; + } + } +} + +// **************************************************** +// Merged Batch Normalization and Quantized Tanh +// **************************************************** +struct batchnorm_quantized_tanh_config { + // Layer Sizes + static const unsigned n_in = 10; + static const unsigned n_filt = -1; + static const unsigned n_scale_bias = 10; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; + static const unsigned n_zeros = 0; +}; + +template +void normalize_binary_tanh(const data_T &data, res_T &res, const typename CONFIG_T::threshold_t &threshold) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + ac_int<1, false> cache; + auto datareg = data[ii]; + int norm_index = CONFIG_T::n_filt == -1 ? ii : ii % CONFIG_T::n_filt; + if (datareg >= threshold[norm_index]) + cache = 1; + else + cache = 0; + + res[ii] = cache; + } +} + +template +void normalize_ternary_tanh(const data_T &data, res_T &res, const typename CONFIG_T::threshold_hi_t &threshold_hi, + const typename CONFIG_T::threshold_lo_t &threshold_lo) { + #pragma unroll + for (int ii = 0; ii < CONFIG_T::n_in; ii++) { + ac_int<2, true> cache; + auto datareg = data[ii]; + int norm_index = CONFIG_T::n_filt == -1 ? ii : ii % CONFIG_T::n_filt; + if (datareg > threshold_hi[norm_index]) + cache = 1; + else if (datareg <= threshold_lo[norm_index]) + cache = -1; + else + cache = 0; + res[ii] = cache; + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_batchnorm_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_batchnorm_stream.h new file mode 100644 index 0000000000..128b3ac1a4 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_batchnorm_stream.h @@ -0,0 +1,107 @@ +#ifndef NNET_BATCHNORM_STREAM_H_ +#define NNET_BATCHNORM_STREAM_H_ + +#include "nnet_common.h" +#include "nnet_helpers.h" +#include "nnet_mult.h" +#include "nnet_types.h" + +namespace nnet { + +// **************************************************** +// Streaming Batch Normalization +// **************************************************** +template +void normalize_stream(typename CONFIG_T::scale_t scale, typename CONFIG_T::bias_t bias) { + + constexpr unsigned multiplier_limit = DIV_ROUNDUP(CONFIG_T::n_in, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = CONFIG_T::n_in / multiplier_limit; + constexpr auto datasize = std::tuple_size::value_type>{}; + CONFIG_T::template product::value_type::value_type, + typename CONFIG_T::scale_t::value_type>::limit(multiplier_limit); + +BatchNormLoop: + [[intel::initiation_interval(pipeline)]] for (int i = 0; i < CONFIG_T::n_in / datasize; i++) { + auto in_data = data_pipe::read(); + typename ExtractPipeType::value_type out_data; + + BatchNormpack: + #pragma unroll + for (int j = 0; j < datasize; j++) { + int norm_index; + if (CONFIG_T::n_filt == -1) + norm_index = i * datasize + j; + else + norm_index = j % CONFIG_T::n_filt; + out_data[j] = + CONFIG_T::template product::value_type::value_type, + typename CONFIG_T::scale_t::value_type>::product(in_data[j], scale[norm_index]) + + bias[norm_index]; + } + + res_pipe::write(out_data); + } +} + +// **************************************************** +// Merged Batch Normalization and Quantized Tanh +// **************************************************** +template +void normalize_binary_tanh_stream(typename CONFIG_T::threshold_t threshold) { + constexpr auto datasize = std::tuple_size::value_type>{}; + +BinaryNormLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_in / datasize; i++) { + auto in_data = data_pipe::read(); + nnet::array, CONFIG_T::n_scale_bias> out_data; + + BatchNormPack: + #pragma unroll + for (int j = 0; j < datasize; j++) { + int norm_index; + if (CONFIG_T::n_filt == -1) + norm_index = i * datasize + j; + else + norm_index = j % CONFIG_T::n_filt; + + out_data[j] = (in_data[j] >= threshold[norm_index]) ? 1 : 0; + } + + res_pipe::write(out_data); + } +} + +template +void normalize_ternary_tanh_stream(typename CONFIG_T::threshold_hi_t threshold_hi, + typename CONFIG_T::threshold_lo_t threshold_lo) { + constexpr auto datasize = std::tuple_size::value_type>{}; + +TernaryNormLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_in / datasize; i++) { + auto in_data = data_pipe::read(); + nnet::array, CONFIG_T::n_scale_bias> out_data; + + BatchNormPack: + #pragma unroll + for (int j = 0; j < datasize; j++) { + int norm_index; + if (CONFIG_T::n_filt == -1) + norm_index = i * datasize + j; + else + norm_index = j % CONFIG_T::n_filt; + + if (in_data[j] > threshold_hi[norm_index]) + out_data[j] = 1; + else if (in_data[j] <= threshold_lo[norm_index]) + out_data[j] = -1; + else + out_data[j] = 0; + } + + res_pipe::write(out_data); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_common.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_common.h new file mode 100644 index 0000000000..f37a61cb0c --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_common.h @@ -0,0 +1,76 @@ +#ifndef NNET_COMMON_H_ +#define NNET_COMMON_H_ + +#include "nnet_helpers.h" +#include +#include +#include + +typedef ac_fixed<16, 6> table_default_t; + +namespace nnet { + +// Common type definitions +enum io_type { io_parallel = 0, io_stream }; + +// Default data types (??) TODO: Deprecate +typedef ac_fixed<16, 4> weight_t_def; +typedef ac_fixed<16, 4> bias_t_def; +typedef ac_fixed<32, 10> accum_t_def; + +template void merge(data_T data1[NIN1], data_T data2[NIN2], data_T res[NIN1 + NIN2]) { + #pragma unroll + for (int ii = 0; ii < NIN1; ii++) { + res[ii] = data1[ii]; + } + #pragma unroll + for (int ii = 0; ii < NIN2; ii++) { + res[NIN1 + ii] = data2[ii]; + } +} + +/* --- + * Balanced tree reduce implementation. + * For use in scenarios where Quartus cannot expression balance + * Reduces an array of inputs to a single value using the template binary operator 'Op', + * for example summing all elements with Op_add, or finding the maximum with Op_max + * Use only when the input array is fully unrolled. Or, slice out a fully unrolled section + * before applying and accumulate the result over the rolled dimension. + * --- */ +template T reduce(const T *x, Op op) { + static constexpr int leftN = pow2::val>::val > 0 ? pow2::val>::val : 0; + static constexpr int rightN = N - leftN > 0 ? N - leftN : 0; + if constexpr (N == 1) { + return x[0]; + } else if constexpr (N == 2) { + return op(x[0], x[1]); + } else { + return op(reduce(x, op), reduce(x + leftN, op)); + } +} + +// alternate reduce - basic +// template T reduce(const T *x, Op op) { +// if (N == 1) { +// return x[0]; +// } +// auto val = op(x[0], x[1]); +// for (int i = 2; i < N; i++) { +// val = op(val, x[i]); +// } +// return val; +// } + +template class Op_add { + public: + T operator()(T a, T b) { return a + b; } +}; + +template class Op_max { + public: + T operator()(T a, T b) { return a >= b ? a : b; } +}; + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d.h new file mode 100644 index 0000000000..38560f120c --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d.h @@ -0,0 +1,61 @@ +#ifndef NNET_CONV1D_H_ +#define NNET_CONV1D_H_ + +#include "nnet_common.h" +#include "nnet_conv1d_resource.h" + +namespace nnet { + +struct conv1d_config { + // I/O sizes + static const unsigned in_width = 10; + static const unsigned out_width = 10; + + // Number of channels, filters + static const unsigned n_chan = 1; + static const unsigned n_filt = 1; + + // Original filter size + static const unsigned filt_width = 1; + static const unsigned kernel_size = filt_width; + + // Modified filter size (post-Wionograd transformation, if applied) + static const unsigned impl_filt_height = 1; + static const unsigned impl_filt_width = 1; + + // Padding, stride, dilation + static const unsigned pad_left = 0; + static const unsigned pad_right = 0; + static const unsigned stride_width = 1; + static const unsigned dilation = 1; + + // Run-time Configuration + static const unsigned n_zeros = 0; + static const unsigned reuse_factor = 1; + static const unsigned parallelization_factor = 1; + + // TODO: BRAM Storage on Quartus + static const bool store_weights_in_bram = false; + + // Internal data type definitions + typedef float bias_t; + typedef float weight_t; + typedef float accum_t; +}; + +template +void conv_1d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + conv_1d_resource_cl(data, res, weights, biases); +} + +template +void pointwise_conv_1d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + assert(CONFIG_T::filt_width == 1); + pointwise_conv_1d_resource_cl(data, res, weights, biases); +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d_resource.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d_resource.h new file mode 100644 index 0000000000..85009d4a3a --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d_resource.h @@ -0,0 +1,237 @@ +#ifndef NNET_CONV1D_RESOURCE_H_ +#define NNET_CONV1D_RESOURCE_H_ + +#include "nnet_common.h" +#include "nnet_dense.h" + +namespace nnet { + +enum class conv1d_implementation { combination, im2col, winograd }; + +// **************************************************************** +// im2col - General-purpose 1D Convolution algorithm +// **************************************************************** + +template +void im2col_1d_cl(const data_T &data, data_col_T &data_col, const int col) { + // im2col can be unrolled fully, since number of parallel executions = filt_w x n_chann ~ O(100) and very little DSP + // usage + + [[intel::fpga_register]] int index = 0; + +KernelLoop: + #pragma unroll + for (int kernel_col = 0; kernel_col < CONFIG_T::impl_filt_width; kernel_col++) { + ChannelLoop: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + [[intel::fpga_register]] int index_data = + (col * CONFIG_T::stride_width + kernel_col - CONFIG_T::pad_left) * CONFIG_T::n_chan + channel; + if (index_data >= 0 && index_data < CONFIG_T::in_width * CONFIG_T::n_chan) { + data_col[index++] = data[index_data]; + } else { + data_col[index++] = 0; + } + } + } +} + +template +void conv_1d_im2col_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + // im2col performs no filter transformations; therefore, filter size remains constant + assert(CONFIG_T::filt_width == CONFIG_T::impl_filt_width); + + // Unroll factor for loop traversing input image, derived from parallelization_factor + static constexpr int pf = MIN(CONFIG_T::parallelization_factor, CONFIG_T::out_width); + + using data_col_T = array; + using res_col_T = array; + +ColLoop: + #pragma unroll pf + [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int i = 0; i < CONFIG_T::out_width; i++) { + // Loop variables should always be declared in the deepest scope available + // See Intel's HLS - Loop Best Practices + // https://www.intel.com/content/www/us/en/docs/programmable/683152/22-2/declare-variables-in-the-deepest-scope.html + + [[intel::fpga_register]] data_col_T data_col; + im2col_1d_cl(data, data_col, i); + + [[intel::fpga_register]] res_col_T res_col; + dense_resource(data_col, res_col, weights, biases); + + // Unroll fully, since + // (1) n_filt is usually low in io_parallel (< 32) + // (2) no complex operations handled in loop, this loop performs a simple register writing operation + FiltLoop: + #pragma unroll + for (int j = 0; j < CONFIG_T::n_filt; j++) { + res[i * CONFIG_T::n_filt + j] = res_col[j]; + } + } +} + +// **************************************************************** +// 1D Convolution for 3x1 kernels from Winograd's algoirithm +// **************************************************************** + +// Explicity transofrmed input (B'dB) needed for Winograd convolution, as explained by Lavin & Gray (2015) +template +inline void winograd_transform_input_tile_3x1_kernel(const data_T I[4], res_T D[4]) { + D[0] = I[0] - I[2]; + D[1] = I[1] + I[2]; + D[2] = -I[1] + I[2]; + D[3] = I[1] - I[3]; +} + +template +void winograd_conv1d_3x1_kernel_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + // Ensure Winograd conditions are met + assert(CONFIG_T::filt_width == 3); + assert(CONFIG_T::stride_width == 1); + assert(CONFIG_T::out_width > 2); + + // Unroll factor for loop traversing input image, derived from parallelization_factor + static constexpr int pf = MIN(CONFIG_T::parallelization_factor, CONFIG_T::out_width); + + // Initialise result to bias + // Unroll fully, as loop performs a simple operation - assigning the outputs to a constant value + #pragma unroll + for (int i = 0; i < CONFIG_T::out_width; i++) { + int offset = CONFIG_T::n_filt * i; + #pragma unroll + for (int f = 0; f < CONFIG_T::n_filt; f++) { + res[offset + f] = static_cast(biases[f]); + } + } + +WidthLoop: + #pragma unroll pf + for (int col = 0; col < CONFIG_T::out_width; col += 2) { + ChannelLoop: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + // Get current 4x1 tile + [[intel::fpga_register]] typename data_T::value_type T[16]; + [[intel::fpga_register]] uint8_t p = 0; + + #pragma unroll + for (int c = col - (int)CONFIG_T::pad_left; c < col + 4 - (int)CONFIG_T::pad_left; c++) { + if (c < CONFIG_T::in_width && c >= 0) { + T[p++] = data[c * CONFIG_T::n_chan + channel]; + } else { + T[p++] = 0; + } + } + + // Transform input tile + [[intel::fpga_register]] typename CONFIG_T::accum_t D[4]; + winograd_transform_input_tile_3x1_kernel(T, D); + + #pragma unroll + for (int filter = 0; filter < CONFIG_T::n_filt; filter++) { + [[intel::fpga_register]] int filter_offset = 4 * (CONFIG_T::n_chan * filter + channel); + + // Hadamard product between transformed input tile and kernel + [[intel::fpga_register]] typename CONFIG_T::accum_t Y[4]; + #pragma unroll + for (int i = 0; i < 4; i++) { + Y[i] = static_cast(D[i] * weights[filter_offset + i]); + } + + // Explicitly transform intermediate result Z = A'YA and save to output + res[CONFIG_T::n_filt * col + filter] += static_cast(Y[0] + Y[1] + Y[2]); + if ((col + 1) < CONFIG_T::out_width) + res[CONFIG_T::n_filt * (col + 1) + filter] += + static_cast(Y[1] - Y[2] - Y[3]); + } + } + } +} + +// **************************************************************** +// 1D Convolution for 1x1 kernels using optimized im2col +// **************************************************************** + +template +void im2col_1d_pointwise_cl(const data_T &data, data_col_T &data_col, const int col) { + // pointwise_im2col can be unrolled fully, only one loop with n_chan iterations + + [[intel::fpga_register]] int index = 0; + +ChannelLoop: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + [[intel::fpga_register]] int index_data = + (col * CONFIG_T::stride_width - CONFIG_T::pad_left) * CONFIG_T::n_chan + channel; + if (index_data >= 0 && index_data < CONFIG_T::in_width * CONFIG_T::n_chan) { + data_col[index++] = data[index_data]; + } else { + data_col[index++] = 0; + } + } +} + +template +void pointwise_conv_1d_resource_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + assert(CONFIG_T::filt_width == 1); + + // Unroll factor for loop traversing input image, derived from parallelization_factor + static constexpr int pf = MIN(CONFIG_T::parallelization_factor, CONFIG_T::out_width); + + using data_col_T = array; + using res_col_T = array; + +ColLoop: + #pragma unroll pf + [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int col = 0; col < CONFIG_T::out_width; col++) { + // Loop variables should always be declared in the deepest scope available + // See Intel's HLS - Loop Best Practices + // https://www.intel.com/content/www/us/en/docs/programmable/683152/22-2/declare-variables-in-the-deepest-scope.html + + [[intel::fpga_register]] data_col_T data_col; + im2col_1d_pointwise_cl(data, data_col, col); + + [[intel::fpga_register]] res_col_T res_col; + dense_resource(data_col, res_col, weights, biases); + + // Unroll fully, since + // (1) n_filt is usually low in io_parallel (< 32) + // (2) no complex operations handled in loop, this loop performs a simple register writing operation + FiltLoop: + #pragma unroll + for (int k = 0; k < CONFIG_T::n_filt; k++) { + res[col * CONFIG_T::n_filt + k] = res_col[k]; + } + } +} + +// **************************************************************** +// Top-level function - handles different implementations +// **************************************************************** +template +void conv_1d_resource_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + static constexpr bool winograd_conditions = + // Winograd's minimal filtering algorithm not applicable to stride != 1 + CONFIG_T::stride_width == 1 && + + // Intel HLS will fail to pipeline the entire component if the Winograd loop only runs once + CONFIG_T::out_width > 2 && + + // Verify user opted for Winograd + (CONFIG_T::implementation == nnet::conv1d_implementation::combination || + CONFIG_T::implementation == nnet::conv1d_implementation::winograd); + + if (CONFIG_T::filt_width == 3 && winograd_conditions) { + winograd_conv1d_3x1_kernel_cl(data, res, weights, biases); + } else { + conv_1d_im2col_cl(data, res, weights, biases); + } +} + +} // namespace nnet +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d_stream.h new file mode 100644 index 0000000000..1ffd11774f --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv1d_stream.h @@ -0,0 +1,177 @@ +#ifndef NNET_CONV1D_STREAM_H_ +#define NNET_CONV1D_STREAM_H_ + +#include "nnet_dense.h" +#include "nnet_types.h" + +namespace nnet { + +/* + * void kernel_shift(shift_buffer, kernel_window) + * + * Args: + * shift_buffer - array elements popped from the line the buffer during the shift line buffer operation + * kernel_window - array of values from the input curently being convolved with the kernel + * + * Values from shift_buffer are inserted into kernel_window, updating the values to be convolved + */ +template +void kernel_shift_1d(typename data_T::value_type shift_buffer[CONFIG_T::n_chan], data_window_T &kernel_window) { +/* + * Manually shift kernel_window by one step to the left + * Not possible to use nnet::shift_reg as the kernel window is convolved with the kernel weights using dense matrix + * multiplication Dense matrix multiplication is only implemented for arrays However, provided certain timing constrains are + * met, Intel HLS automatically infers a shift operation and implements kernel_window as a shift register To verify, see + * synthesis report in report.html > Area Analysis of System + */ +KernelShiftWidth: + #pragma unroll + for (int col = 0; col < CONFIG_T::filt_width - 1; col++) { + KernelShiftChannel: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + kernel_window[col * CONFIG_T::n_chan + channel] = kernel_window[(col + 1) * CONFIG_T::n_chan + channel]; + } + } + +// Insert shift_buffer values into the last column of the kernel window +KernelPushChannel: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + kernel_window[(CONFIG_T::filt_width - 1) * CONFIG_T::n_chan + channel] = shift_buffer[channel]; + } +} + +/* + * void shift_line_buffer(in_element, line_buffer, shift_buffer) + * + * Args: + * in_element - current elements from input image, data_T type is usually nnet::array, size of array corresponds to number + * of channels line_buffer - chained array of shift registers, one for each row of the kernel and channel shift_buffer - + * array elements popped from the line the buffer during the shift operation + * + * Values from in_element are inserted into the line buffer, causing all other elements to be shifted by one + * Popped elements are later used to update the kernel window, during the kernel_shift operation + */ +template +void shift_line_buffer_1d( + const data_T &in_elem, + nnet::shift_reg + line_buffer[CONFIG_T::n_chan], + typename data_T::value_type shift_buffer[CONFIG_T::n_chan]) { +// For every channel, insert the incoming pixel at end of the shift buffer +UpdateBuffer: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + shift_buffer[channel] = in_elem[channel]; + } +} + +/* + * void compute_output_buffer(in_element, res_stream, line_buffer, kernel_window, weights, biases) + * + * Args: + * in_element - current elements from input image, data_T type is usually nnet::array, size of array corresponds to number + * of channels res_stream - output stream, passed by reference to allow direct writing line_buffer - chained array of shift + * registers, one for each row of the kernel and channel kernel_window - array of values from the input curently convolved + * with the kernel weights - Conv1D layer weights biases - Conv1D layer biases + * + * Function executes 4 steps: + * (1) Shift line buffer - updates the contents of the chained shift registers, inserting the new inputs and removing last + * elements (2) Kernel shift - updates the elements of the kernel window, by storing the new inputs and popped elements from + * the line buffer (3) Matrix mulitplication - performs dense matrix multiplication between the current input window and + * kernel weights (4) Counter housekeeping - keeps track of current pixel and stride + */ +template +void compute_output_buffer_1d( + const data_T &in_elem, + nnet::shift_reg + line_buffer[CONFIG_T::n_chan], + data_window_T &kernel_window, const typename CONFIG_T::weight_t &weights, const typename CONFIG_T::bias_t &biases, + int &pX, int &sX) { + + using res_T = typename ExtractPipeType::value_type; + + // Thresholds + constexpr int lShiftX = CONFIG_T::filt_width - 1; + + // Step 1 - Shift line buffer + [[intel::fpga_register]] typename data_T::value_type shift_buffer[CONFIG_T::n_chan]; + nnet::shift_line_buffer_1d(in_elem, line_buffer, shift_buffer); + + // Step 2 - Kernel shift + nnet::kernel_shift_1d(shift_buffer, kernel_window); + + // Check to see if we have a full kernel + if ((sX - lShiftX) == 0 && pX > (lShiftX - 1)) { + // Step 3 - Dense matrix multiplication + [[intel::fpga_register]] res_T res_out; + dense_resource(kernel_window, res_out, weights, biases); + + // Write result to output stream + [[intel::fpga_register]] res_T res_pack; + CastLoop: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_filt; channel++) { + res_pack[channel] = res_out[channel]; + } + res_pipe::write(res_pack); + } + + // Reached end of image + if ((pX + 1) == (CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right)) { + pX = 0; + sX = 0; + // Move to the right + } else { + pX++; + sX = ((sX - lShiftX) == 0) ? (sX - CONFIG_T::stride_width + 1) : (sX + 1); + } +} + +template +void conv_1d_cl_stream(typename CONFIG_T::weight_t weights, typename CONFIG_T::bias_t biases) { + + using data_arr_T = typename ExtractPipeType::value_type; + using data_element_T = typename data_arr_T::value_type; + using data_window_T = array; + + // Line buffer and kernel window + [[intel::fpga_register]] nnet::shift_reg + line_buffer[CONFIG_T::n_chan]; + [[intel::fpga_register]] data_window_T kernel_window; + + // An array of length CONFIG_T::n_chan, with elements set to zero (padding for each channel) + constexpr auto padds = zero_array(); + + // move former static variables outside the function calls + // X position pixel + int pX = 0; + // X strides + int sX = 0; + +// Input image left-side padding +PaddingLeftWidth: + for (int col = 0; col < CONFIG_T::pad_left; col++) { + compute_output_buffer_1d(padds, line_buffer, kernel_window, weights, + biases, pX, sX); + } + +// Read input image +ReadInputWidth: + for (int col = 0; col < CONFIG_T::in_width; col++) { + compute_output_buffer_1d(data_pipe::read(), line_buffer, + kernel_window, weights, biases, pX, sX); + } + +// Input image right-side padding +PaddingRightWidth: + for (int col = 0; col < CONFIG_T::pad_right; col++) { + compute_output_buffer_1d(padds, line_buffer, kernel_window, weights, + biases, pX, sX); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d.h new file mode 100644 index 0000000000..79b1508c5f --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d.h @@ -0,0 +1,67 @@ +#ifndef NNET_CONV2D_H_ +#define NNET_CONV2D_H_ + +#include "nnet_conv2d_resource.h" + +namespace nnet { + +struct conv2d_config { + // I/O sizes + static const unsigned in_height = 10; + static const unsigned in_width = 10; + static const unsigned out_height = 10; + static const unsigned out_width = 10; + + // Number of channels, filters + static const unsigned n_chan = 1; + static const unsigned n_filt = 1; + + // Original filter size + static const unsigned filt_height = 1; + static const unsigned filt_width = 1; + static const unsigned kernel_size = filt_height * filt_width; + + // Modified filter size (post-Wionograd transformation, if applied) + static const unsigned impl_filt_height = 1; + static const unsigned impl_filt_width = 1; + + // Padding, stride, dilation + static const unsigned pad_top = 0; + static const unsigned pad_bottom = 0; + static const unsigned pad_left = 0; + static const unsigned pad_right = 0; + static const unsigned stride_height = 1; + static const unsigned stride_width = 1; + static const unsigned dilation_height = 1; + static const unsigned dilation_width = 1; + + // Run-time configuration + static const unsigned n_zeros = 0; + static const unsigned reuse_factor = 1; + static const unsigned parallelization_factor = 1; + + // TODO: BRAM Storage on Quartus + static const bool store_weights_in_bram = false; + + // Internal data type definitions + typedef float bias_t; + typedef float weight_t; + typedef float accum_t; +}; + +template +void conv_2d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + conv_2d_resource_cl(data, res, weights, biases); +} + +template +void pointwise_conv_2d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + assert(CONFIG_T::filt_height == 1 && CONFIG_T::filt_width == 1); + pointwise_conv_2d_resource_cl(data, res, weights, biases); +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d_resource.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d_resource.h new file mode 100644 index 0000000000..7265d90e1c --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d_resource.h @@ -0,0 +1,297 @@ +#ifndef NNET_CONV2D_RESOURCE_H_ +#define NNET_CONV2D_RESOURCE_H_ + +#include "nnet_common.h" +#include "nnet_dense.h" +#include "nnet_helpers.h" + +namespace nnet { + +enum class conv2d_implementation { combination, im2col, winograd }; + +// **************************************************************** +// im2col - General-purpose 2D Convolution algorithm +// **************************************************************** + +template +void im2col_2d_cl(const data_T &data, data_col_T &data_col, const int row, const int col) { + // im2col can be unrolled fully, since number of parallel executions = filt_h x filt_w x n_chann ~ O(100) and very little + // DSP usage + + [[intel::fpga_register]] int index = 0; + +FiltHeightLoop: + #pragma unroll + for (int kernel_row = 0; kernel_row < CONFIG_T::impl_filt_height; kernel_row++) { + [[intel::fpga_register]] int input_row = + -CONFIG_T::pad_top + kernel_row * CONFIG_T::dilation_height + row * CONFIG_T::stride_height; + + FiltWidthLoop: + #pragma unroll + for (int kernel_col = 0; kernel_col < CONFIG_T::impl_filt_width; kernel_col++) { + [[intel::fpga_register]] int input_col = + -CONFIG_T::pad_left + kernel_col * CONFIG_T::dilation_width + col * CONFIG_T::stride_width; + + ChannelLoop: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + if (input_row >= 0 && input_row < CONFIG_T::in_height && input_col >= 0 && input_col < CONFIG_T::in_width) { + data_col[index++] = + data[input_row * CONFIG_T::in_width * CONFIG_T::n_chan + input_col * CONFIG_T::n_chan + channel]; + } else { + data_col[index++] = 0; + } + } + } + } +} + +template +void conv_2d_im2col_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + // im2col performs no filter transformations; therefore, filter size remains constant + assert(CONFIG_T::filt_height == CONFIG_T::impl_filt_height && CONFIG_T::filt_width == CONFIG_T::impl_filt_width); + + // Unroll factors for loop traversing input image, derived from parallelization_factor + // Outer loop only gets unrolled after inner loop is fully unrolled + static constexpr int pfc = MIN(CONFIG_T::parallelization_factor, CONFIG_T::out_width); + static constexpr int pfr = MIN((CONFIG_T::parallelization_factor / pfc), CONFIG_T::out_height); + + using data_col_T = + array; + using res_col_T = array; + +HeightLoop: + #pragma unroll pfr + for (int i = 0; i < CONFIG_T::out_height; i++) { + WidthLoop: + #pragma unroll pfc + [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int j = 0; j < CONFIG_T::out_width; j++) { + // Loop variables should always be declared in the deepest scope available + // See Intel's HLS - Loop Best Practices + // https://www.intel.com/content/www/us/en/docs/programmable/683152/22-2/declare-variables-in-the-deepest-scope.html + + [[intel::fpga_register]] data_col_T data_col; + im2col_2d_cl(data, data_col, i, j); + + [[intel::fpga_register]] res_col_T res_col; + dense_resource(data_col, res_col, weights, biases); + + // Unroll fully, since + // (1) n_filt is usually low in io_parallel (< 32) + // (2) no complex operations handled in loop, this loop performs a simple register writing operation + FiltLoop: + #pragma unroll + for (int k = 0; k < CONFIG_T::n_filt; k++) { + res[i * CONFIG_T::out_width * CONFIG_T::n_filt + j * CONFIG_T::n_filt + k] = res_col[k]; + } + } + } +} + +// **************************************************************** +// 2D Convolution for 3x3 kernels from Winograd's algoirithm +// **************************************************************** + +// Explicity transofrmed input (B'dB) needed for Winograd calculation, as explained by Lavin & Gray, 2015 +template +inline void winograd_transform_input_tile_3x3_kernel(const data_T I[16], res_T D[16]) { + D[0] = I[0] - I[2] - I[8] + I[10]; + D[1] = I[1] + I[2] - I[9] - I[10]; + D[2] = -I[1] + I[2] + I[9] - I[10]; + D[3] = I[1] - I[3] - I[9] + I[11]; + + D[4] = I[4] - I[6] + I[8] - I[10]; + D[5] = I[5] + I[6] + I[9] + I[10]; + D[6] = -I[5] + I[6] - I[9] + I[10]; + D[7] = I[5] - I[7] + I[9] - I[11]; + + D[8] = -I[4] + I[6] + I[8] - I[10]; + D[9] = -I[5] - I[6] + I[9] + I[10]; + D[10] = I[5] - I[6] - I[9] + I[10]; + D[11] = -I[5] + I[7] + I[9] - I[11]; + + D[12] = I[4] - I[6] - I[12] + I[14]; + D[13] = I[5] + I[6] - I[13] - I[14]; + D[14] = I[6] - I[5] + I[13] - I[14]; + D[15] = I[5] - I[7] - I[13] + I[15]; +} + +template +void winograd_conv2d_3x3_kernel_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + // Ensure Winograd conditions are met + assert(CONFIG_T::filt_height == 3 && CONFIG_T::filt_width == 3); + assert(CONFIG_T::stride_height == 1 && CONFIG_T::stride_width == 1); + assert(CONFIG_T::pad_left == CONFIG_T::pad_right && CONFIG_T::pad_top == CONFIG_T::pad_bottom); + assert(CONFIG_T::out_height > 2 && CONFIG_T::out_width > 2); + + // Unroll factor for loop traversing input image, derived from parallelization_factor + // Outer loop only gets unrolled after inner loop is fully unrolled + static constexpr int pfc = MIN(CONFIG_T::parallelization_factor, DIV_ROUNDUP(CONFIG_T::out_width, 2)); + static constexpr int pfr = MIN((CONFIG_T::parallelization_factor / pfc), DIV_ROUNDUP(CONFIG_T::out_height, 2)); + + // Initialise result to bias + // Unroll fully, as loop performs a simple operation - assigning the outputs to a constant value + #pragma unroll + for (int i = 0; i < CONFIG_T::out_height * CONFIG_T::out_width; i++) { + int offset = CONFIG_T::n_filt * i; + #pragma unroll + for (int f = 0; f < CONFIG_T::n_filt; f++) { + res[offset + f] = static_cast(biases[f]); + } + } + +HeightLoop: + #pragma unroll pfr + for (int row = 0; row < CONFIG_T::out_height; row += 2) { + WidthLoop: + #pragma unroll pfc + for (int col = 0; col < CONFIG_T::out_width; col += 2) { + ChannelLoop: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + // Get current 4x4 tile + [[intel::fpga_register]] typename data_T::value_type T[16]; + [[intel::fpga_register]] typename CONFIG_T::accum_t D[16]; + [[intel::fpga_register]] uint8_t p = 0; + + #pragma unroll + for (int r = row - (int)CONFIG_T::pad_top; r < row + 4 - (int)CONFIG_T::pad_top; r++) { + #pragma unroll + for (int c = col - (int)CONFIG_T::pad_left; c < col + 4 - (int)CONFIG_T::pad_left; c++) { + if (r < CONFIG_T::in_height && r >= 0 && c < CONFIG_T::in_width && c >= 0) { + T[p++] = data[r * CONFIG_T::in_width * CONFIG_T::n_chan + c * CONFIG_T::n_chan + channel]; + } else { + T[p++] = 0; + } + } + } + + // Transform input tile + winograd_transform_input_tile_3x3_kernel(T, D); + + #pragma unroll + for (int filter = 0; filter < CONFIG_T::n_filt; filter++) { + [[intel::fpga_register]] int filter_offset = 16 * (CONFIG_T::n_chan * filter + channel); + + // Hadamard product between transformed input tile and kernel + [[intel::fpga_register]] typename CONFIG_T::accum_t Y[16]; + #pragma unroll + for (int i = 0; i < 16; i++) { + Y[i] = static_cast(D[i] * weights[filter_offset + i]); + } + + // Explicitly transform intermediate result Z = A'YA and save to output + res[CONFIG_T::n_filt * (row * CONFIG_T::out_width + col) + filter] += + static_cast(Y[0] + Y[1] + Y[2] + Y[4] + Y[5] + Y[6] + Y[8] + Y[9] + + Y[10]); + if ((col + 1) < CONFIG_T::out_height) + res[CONFIG_T::n_filt * (row * CONFIG_T::out_width + (col + 1)) + filter] += + static_cast(Y[1] - Y[2] - Y[3] + Y[5] - Y[6] - Y[7] + Y[9] - Y[10] - + Y[11]); + if ((row + 1) < CONFIG_T::out_width) + res[CONFIG_T::n_filt * ((row + 1) * CONFIG_T::out_width + col) + filter] += + static_cast(Y[4] + Y[5] + Y[6] - Y[8] - Y[9] - Y[10] - Y[12] - + Y[13] - Y[14]); + if ((row + 1) < (CONFIG_T::out_width) && (col + 1) < CONFIG_T::out_height) + res[CONFIG_T::n_filt * ((row + 1) * CONFIG_T::out_width + (col + 1)) + filter] += + static_cast(Y[5] - Y[6] - Y[7] - Y[9] + Y[10] + Y[11] + Y[15] - + Y[13] + Y[14]); + } + } + } + } +} + +// **************************************************************** +// 2D Convolution for 1x1 kernels using optimized im2col +// **************************************************************** + +template +void im2col_2d_pointwise_cl(const data_T &data, data_col_T &data_col, const int row, const int col) { + // pointwise_im2col can be unrolled fully, only one loop with n_chan iterations + + [[intel::fpga_register]] int index = 0; + +ChannelLoop: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + + [[intel::fpga_register]] int input_row = -CONFIG_T::pad_top + row * CONFIG_T::stride_height; + [[intel::fpga_register]] int input_col = -CONFIG_T::pad_left + col * CONFIG_T::stride_width; + + if (input_row >= 0 && input_row < CONFIG_T::in_height && input_col >= 0 && input_col < CONFIG_T::in_width) { + data_col[index++] = + data[input_row * CONFIG_T::in_width * CONFIG_T::n_chan + input_col * CONFIG_T::n_chan + channel]; + } else { + data_col[index++] = 0; + } + } +} + +template +void pointwise_conv_2d_resource_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + assert(CONFIG_T::filt_height == 1 && CONFIG_T::filt_width == 1); + + // Unroll factors for loop traversing input image, derived from parallelization_factor + // Outer loop only gets unrolled after inner loop is fully unrolled + static constexpr int pfc = MIN(CONFIG_T::parallelization_factor, CONFIG_T::out_width); + static constexpr int pfr = MIN((CONFIG_T::parallelization_factor / pfc), CONFIG_T::out_height); + + using data_col_T = array; + using res_col_T = array; + +HeightLoop: + #pragma unroll pfr + for (int row = 0; row < CONFIG_T::out_height; row++) { + WidthLoop: + #pragma unroll pfc + [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int col = 0; col < CONFIG_T::out_width; col++) { + // Loop variables should always be declared in the deepest scope available + // See Intel's HLS - Loop Best Practices + // https://www.intel.com/content/www/us/en/docs/programmable/683152/22-2/declare-variables-in-the-deepest-scope.html + + [[intel::fpga_register]] data_col_T data_col; + im2col_2d_pointwise_cl(data, data_col, row, col); + + [[intel::fpga_register]] res_col_T res_col; + dense_resource(data_col, res_col, weights, biases); + + FiltLoop: + #pragma unroll + for (int k = 0; k < CONFIG_T::n_filt; k++) { + res[row * CONFIG_T::out_width * CONFIG_T::n_filt + col * CONFIG_T::n_filt + k] = res_col[k]; + } + } + } +} + +// **************************************************************** +// Top-level function - handles different implementations +// **************************************************************** +template +void conv_2d_resource_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + static constexpr bool winograd_conditions = + // Winograd's minimal filtering algorithm not applicable to stride != 1 + CONFIG_T::stride_height == 1 && CONFIG_T::stride_width == 1 && + + // Intel HLS will fail to pipeline the entire component if the Winograd loop only runs once + CONFIG_T::out_height > 2 && CONFIG_T::out_width > 2 && + + // Verify user opted for Winograd + (CONFIG_T::implementation == nnet::conv2d_implementation::combination || + CONFIG_T::implementation == nnet::conv2d_implementation::winograd); + + if (CONFIG_T::filt_height == 3 && CONFIG_T::filt_width == 3 && winograd_conditions) { + winograd_conv2d_3x3_kernel_cl(data, res, weights, biases); + } else { + conv_2d_im2col_cl(data, res, weights, biases); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d_stream.h new file mode 100644 index 0000000000..08f0eaa872 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_conv2d_stream.h @@ -0,0 +1,241 @@ +#ifndef NNET_CONV2D_STREAM_H_ +#define NNET_CONV2D_STREAM_H_ + +#include "nnet_dense.h" +#include "nnet_types.h" + +namespace nnet { + +/* + * void kernel_shift(shift_buffer, kernel_window) + * + * Args: + * shift_buffer - array elements popped from the line the buffer during the shift line buffer operation + * kernel_window - array of values from the input curently being convolved with the kernel + * + * Values from shift_buffer are inserted into kernel_window, updating the values to be convolved + */ +template +void kernel_shift_2d(typename data_T::value_type shift_buffer[CONFIG_T::filt_height][CONFIG_T::n_chan], + data_window_T &kernel_window) { +/* + * Manually shift kernel_window by one step to the left + * Not possible to use nnet::shift_reg as the kernel window is convolved with the kernel weights using dense matrix + * multiplication Dense matrix multiplication is only implemented for arrays However, provided certain timing constrains are + * met, Intel HLS automatically infers a shift operation and implements kernel_window as a shift register To verify, see + * synthesis report in report.html > Area Analysis of System + */ +KernelShiftWidth: + #pragma unroll + for (int col = 0; col < CONFIG_T::filt_width - 1; col++) { + KernelShiftHeight: + #pragma unroll + for (int row = 0; row < CONFIG_T::filt_height; row++) { + KernelShiftChannel: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + kernel_window[row * CONFIG_T::filt_width * CONFIG_T::n_chan + col * CONFIG_T::n_chan + channel] = + kernel_window[row * CONFIG_T::filt_width * CONFIG_T::n_chan + (col + 1) * CONFIG_T::n_chan + channel]; + } + } + } + +// Insert shift_buffer values into the last column of the kernel window +KernelPushHeight: + #pragma unroll + for (int col = 0; col < CONFIG_T::filt_height; col++) { + KernelPushChannel: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + kernel_window[(CONFIG_T::filt_width - 1) * CONFIG_T::n_chan + col * CONFIG_T::filt_width * CONFIG_T::n_chan + + channel] = shift_buffer[col][channel]; + } + } +} + +/* + * void shift_line_buffer(in_element, line_buffer, shift_buffer) + * + * Args: + * in_element - current elements from input image, data_T type is usually nnet::array, size of array corresponds to number + * of channels line_buffer - chained array of shift registers, one for each row of the kernel and channel shift_buffer - + * array elements popped from the line the buffer during the shift operation + * + * Values from in_element are inserted into the line buffer, causing all other elements to be shifted by one + * Popped elements are later used to update the kernel window, during the kernel_shift operation + */ +template +void shift_line_buffer_2d( + const data_T &in_elem, + nnet::shift_reg + line_buffer[MAX(CONFIG_T::filt_height - 1, 1)][CONFIG_T::n_chan], + typename data_T::value_type shift_buffer[CONFIG_T::filt_height][CONFIG_T::n_chan]) { +// For every channel, insert the incoming pixel at end of the shift buffer +UpdateBuffer: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + shift_buffer[CONFIG_T::filt_height - 1][channel] = in_elem[channel]; + } + +// Shift line buffer and save popped values to shift buffer +LineBufferDataIn: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_chan; channel++) { + LineBufferShift: + #pragma unroll + for (unsigned col = 1; col < CONFIG_T::filt_height; col++) { + // Shift the line buffer, return the popped pixel + typename data_T::value_type pop = + line_buffer[col - 1][channel].shift(shift_buffer[CONFIG_T::filt_height - col][channel]); + + // Place popped pixed into the shift buffer, one row above + shift_buffer[CONFIG_T::filt_height - col - 1][channel] = pop; + } + } +} + +/* + * void compute_output_buffer(in_element, res_stream, line_buffer, kernel_window, weights, biases) + * + * Args: + * in_element - current elements from input image, data_T type is usually nnet::array, size of array corresponds to number + * of channels res_stream - output stream, passed by reference to allow direct writing line_buffer - chained array of shift + * registers, one for each row of the kernel and channel kernel_window - array of values from the input curently convolved + * with the kernel weights - Conv1D/Conv2D layer weights biases - Conv1D/Conv2D layer biases + * + * Function executes 4 steps: + * (1) Shift line buffer - updates the contents of the chained shift registers, inserting the new inputs and removing last + * elements (2) Kernel shift - updates the elements of the kernel window, by storing the new inputs and popped elements from + * the line buffer (3) Matrix mulitplication - performs dense matrix multiplication between the current input window and + * kernel weights (4) Counter housekeeping - keeps track of current pixel and stride + */ +template +void compute_output_buffer_2d( + const data_T &in_elem, + nnet::shift_reg + line_buffer[MAX(CONFIG_T::filt_height - 1, 1)][CONFIG_T::n_chan], + data_window_T &kernel_window, const typename CONFIG_T::weight_t &weights, const typename CONFIG_T::bias_t &biases, + int &pX, int &pY, int &sX, int &sY) { + + using res_T = typename ExtractPipeType::value_type; + + // Thresholds + constexpr int lShiftX = CONFIG_T::filt_width - 1; + constexpr int lShiftY = CONFIG_T::filt_height - 1; + + // Step 1 - Shift line buffer + [[intel::fpga_register]] typename data_T::value_type shift_buffer[CONFIG_T::filt_height][CONFIG_T::n_chan]; + nnet::shift_line_buffer_2d(in_elem, line_buffer, shift_buffer); + + // Step 2 - Kernel shift + nnet::kernel_shift_2d(shift_buffer, kernel_window); + + // Check to see if we have a full kernel + if ((sX - lShiftX) == 0 && (sY - lShiftY) == 0 && pY > (lShiftY - 1) && pX > (lShiftX - 1)) { + // Step 3 - Dense matrix multiplication + [[intel::fpga_register]] res_T res_out; + dense_resource(kernel_window, res_out, weights, biases); + + // Write result to output stream + [[intel::fpga_register]] res_T res_pack; + CastLoop: + #pragma unroll + for (int channel = 0; channel < CONFIG_T::n_filt; channel++) { + res_pack[channel] = res_out[channel]; + } + res_pipe::write(res_pack); + } + + // Reached end of image + if ((pX + 1) == (CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right) && + (pY + 1) == (CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom)) { + pX = 0; + sX = 0; + pY = 0; + sY = 0; + // Reached end of row + } else if ((pX + 1) == (CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right)) { + pX = 0; + sX = 0; + pY++; + sY = ((sY - lShiftY) == 0) ? (sY - CONFIG_T::stride_height + 1) : (sY + 1); + // Same row, same colum, therefore, move to the right + } else { + pX++; + sX = ((sX - lShiftX) == 0) ? (sX - CONFIG_T::stride_width + 1) : (sX + 1); + } +} + +template +void conv_2d_cl_stream(typename CONFIG_T::weight_t weights, typename CONFIG_T::bias_t biases) { + + using data_arr_T = typename ExtractPipeType::value_type; + using data_element_T = typename data_arr_T::value_type; + using data_window_T = array; + + // Line buffer and kernel window + [[intel::fpga_register]] nnet::shift_reg + line_buffer[MAX(CONFIG_T::filt_height - 1, 1)][CONFIG_T::n_chan]; + [[intel::fpga_register]] data_window_T kernel_window; + + // An array of length CONFIG_T::n_chan, with elements set to zero (padding for each channel) + constexpr auto padds = zero_array(); + + // move former static variables outside the function calls + // X position pixel + int pX = 0; + // Y position pixel + int pY = 0; + // X strides + int sX = 0; + // Y strides + int sY = 0; + +// Padding above input image +PaddingTopHeight: + [[intel::loop_coalesce(2)]] for (int row = 0; row < CONFIG_T::pad_top; row++) { + PaddingTopWidth: + for (int col = 0; col < CONFIG_T::pad_left + CONFIG_T::in_width + CONFIG_T::pad_right; col++) { + compute_output_buffer_2d(padds, line_buffer, kernel_window, + weights, biases, pX, pY, sX, sY); + } + } + +ReadInputHeight: + [[intel::loop_coalesce(2)]] for (int row = 0; row < CONFIG_T::in_height; row++) { + // Input image left-side padding + PaddingLeftWidth: + for (int col = 0; col < CONFIG_T::pad_left; col++) { + compute_output_buffer_2d(padds, line_buffer, kernel_window, + weights, biases, pX, pY, sX, sY); + } + + // Read input image + ReadInputWidth: + for (int col = 0; col < CONFIG_T::in_width; col++) { + compute_output_buffer_2d( + data_pipe::read(), line_buffer, kernel_window, weights, biases, pX, pY, sX, sY); + } + + // Input image right-side padding + PaddingRightWidth: + for (int col = 0; col < CONFIG_T::pad_right; col++) { + compute_output_buffer_2d(padds, line_buffer, kernel_window, + weights, biases, pX, pY, sX, sY); + } + } + +// Padding below input image +PaddingBottomHeight: + [[intel::loop_coalesce(2)]] for (int row = 0; row < CONFIG_T::pad_bottom; row++) { + PaddingBottomWidth: + for (int col = 0; col < CONFIG_T::pad_left + CONFIG_T::in_width + CONFIG_T::pad_right; col++) { + compute_output_buffer_2d(padds, line_buffer, kernel_window, + weights, biases, pX, pY, sX, sY); + } + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_dense.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_dense.h new file mode 100644 index 0000000000..dc76189083 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_dense.h @@ -0,0 +1,164 @@ +#ifndef NNET_DENSE_LARGE_H_ +#define NNET_DENSE_LARGE_H_ + +#include "nnet_common.h" +#include "nnet_helpers.h" +#include "nnet_mult.h" +#include + +namespace nnet { + +struct dense_config { + // Internal data type definitions + typedef float bias_t; + typedef float weight_t; + typedef float accum_t; + + // Layer Sizes + static const unsigned n_in = 10; + static const unsigned n_out = 10; + + static const unsigned reuse_factor = 1; + static const unsigned block_factor = 1; // DIV_ROUNDUP(CONFIG_T::n_in*CONFIG_T::n_out, CONFIG_T::reuse_factor); + static const unsigned multiplier_limit = 1; // DIV_ROUNDUP(CONFIG_T::n_in*CONFIG_T::n_out, multfactor) + static const unsigned multiplier_factor = 1; // min n_in, rf + static const unsigned multiplier_scale = 1; // M_LIMIT/CONFIG_T::n_out; + static const unsigned reciprocal = 1; // 2^35 / 25 + static const unsigned rf_pad = 0; + static const unsigned bf_pad = 0; + // Resource reuse info + static const unsigned io_type = io_parallel; + static const bool store_weights_in_bram = false; + static const unsigned n_zeros = 0; + // partitioning arrays cyclically to go with roll factors? + + // Default multiplication + template using product = nnet::product::mult; +}; + +template +void dense_rf_gt(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + assert((CONFIG_T::multiplier_limit % CONFIG_T::n_out == 0 || CONFIG_T::reuse_factor >= CONFIG_T::n_in) && + "The current Reuse Factor is not allowed"); + assert((CONFIG_T::reuse_factor > CONFIG_T::n_in) && "This function is correct only for RF > N_IN"); + //#pragma ii CONFIG_T::reuse_factor + [[intel::fpga_register]] typename CONFIG_T::accum_t acc[CONFIG_T::n_out]; +Load: + #pragma unroll + for (int iacc = 0; iacc < CONFIG_T::n_out; iacc++) { + acc[iacc] = (typename CONFIG_T::accum_t)biases[iacc]; + } + [[intel::fpga_register]] int out_index[CONFIG_T::reuse_factor][CONFIG_T::block_factor]; + [[intel::fpga_register]] int d_index[CONFIG_T::reuse_factor][CONFIG_T::block_factor]; + + #pragma unroll + for (int ir = 0; ir < CONFIG_T::reuse_factor; ir++) { + #pragma unroll + for (int im = 0; im < CONFIG_T::block_factor; im++) { + uint32_t w_index = ir + CONFIG_T::reuse_factor * im; + out_index[ir][im] = (w_index / CONFIG_T::multiplier_factor); + d_index[ir][im] = w_index % CONFIG_T::n_in; + } + } +Product1: + [[intel::nofusion, intel::speculated_iterations(0)]] for (int ir = 0; ir < CONFIG_T::reuse_factor; ir++) { + [[intel::fpga_register]] typename CONFIG_T::accum_t tmp_acc[CONFIG_T::block_factor]; + Product2: + #pragma unroll + for (int im = 0; im < CONFIG_T::block_factor; im++) { + uint32_t w_index = ir + (CONFIG_T::reuse_factor_rounded)*im; + if (w_index >= CONFIG_T::reuse_factor_rounded * CONFIG_T::block_factor_rounded) + continue; + int data_index = d_index[ir][im]; + // Modified this + tmp_acc[im] = + CONFIG_T::template product::product( + data[data_index], weights[w_index]); + } + [[intel::fpga_register]] typename CONFIG_T::accum_t mult[CONFIG_T::multiplier_limit]; + ResetMult: + #pragma unroll + for (int imult = 0; imult < CONFIG_T::multiplier_limit; imult++) { + mult[imult] = 0; + } + AccumLoop1: + #pragma unroll + for (int im = 0; im < CONFIG_T::block_factor; im++) { + int o_index = out_index[ir][im]; + if (o_index >= CONFIG_T::n_out) + continue; // check out of bounds + mult[o_index] += tmp_acc[im]; + } + AccumLoop2: + #pragma unroll + for (int im = 0; im < CONFIG_T::multiplier_limit; im++) { + acc[im] += mult[im]; + } + } +Store: + #pragma unroll + for (int ires = 0; ires < CONFIG_T::n_out; ires++) { + res[ires] = cast(acc[ires]); // acc[jj]; + } +} +template +void dense_rf_lt(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + assert((CONFIG_T::multiplier_limit % CONFIG_T::n_out == 0 || CONFIG_T::reuse_factor >= CONFIG_T::n_in) && + "The current Reuse Factor is not allowed"); + assert((CONFIG_T::multiplier_limit == CONFIG_T::block_factor) && "This function is correct only for RF <= N_IN"); + + [[intel::fpga_register]] typename CONFIG_T::accum_t acc[CONFIG_T::n_out]; +InitAccum: + #pragma unroll + for (int iacc = 0; iacc < CONFIG_T::n_out; iacc++) { + acc[iacc] = (typename CONFIG_T::accum_t)biases[iacc]; + } +ReuseLoop: + [[intel::nofusion, intel::speculated_iterations(0)]] for (int ir = 0; ir < CONFIG_T::reuse_factor; ir++) { + [[intel::fpga_register]] typename CONFIG_T::accum_t mult[CONFIG_T::block_factor]; + MultLoop: + #pragma unroll + for (int im = 0, in_index = ir; im < CONFIG_T::block_factor; im++) { + uint32_t w_index = ir + (CONFIG_T::reuse_factor_rounded)*im; + if (ir + CONFIG_T::reuse_factor * im >= CONFIG_T::n_in * CONFIG_T::n_out) + continue; + // Modified this + mult[im] = + CONFIG_T::template product::product( + data[in_index], weights[w_index]); + in_index += CONFIG_T::reuse_factor; + if (in_index >= CONFIG_T::n_in) + in_index = ir; + } + AccumLoop: + #pragma unroll + for (int im = 0, out_index = 0, acc_step = 0; im < CONFIG_T::block_factor; im++) { + acc[out_index] += mult[im]; + if (acc_step + 1 >= CONFIG_T::multiplier_scale) { + acc_step = 0; + out_index++; + } else { + acc_step++; + } + } + } +// Cast to "res_t" type +Result: + #pragma unroll + for (int ires = 0; ires < CONFIG_T::n_out; ires++) { + res[ires] = cast(acc[ires]); + } +} +template +void dense_resource(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + if (CONFIG_T::reuse_factor <= CONFIG_T::n_in) { + dense_rf_lt(data, res, weights, biases); + } else { + dense_rf_gt(data, res, weights, biases); + } +} +} // namespace nnet +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_dense_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_dense_stream.h new file mode 100644 index 0000000000..92c9adc3bb --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_dense_stream.h @@ -0,0 +1,23 @@ +#ifndef NNET_DENSE_STREAM_H_ +#define NNET_DENSE_STREAM_H_ + +#include "nnet_common.h" +#include "nnet_dense.h" +#include "nnet_types.h" + +namespace nnet { + +// Note: DataPack logic removed, at least in the initial version +template +void dense_resource_stream(typename CONFIG_T::weight_t weights, typename CONFIG_T::bias_t biases) { + + [[intel::fpga_register]] typename ExtractPipeType::value_type res; + [[intel::fpga_register]] auto data = data_pipe::read(); + dense_resource::value_type, typename ExtractPipeType::value_type, + CONFIG_T>(data, res, weights, biases); + res_pipe::write(res); +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv1d.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv1d.h new file mode 100644 index 0000000000..d2c774fcf8 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv1d.h @@ -0,0 +1,19 @@ +#ifndef NNET_DEPTH_CONV1D_H_ +#define NNET_DEPTH_CONV1D_H_ + +#include "nnet_common.h" +#include "nnet_conv1d.h" +#include "nnet_depthconv1d_resource.h" + +namespace nnet { + +template +void depthwise_conv_1d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + + depthwise_conv_1d_resource_cl(data, res, weights, biases); +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv1d_resource.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv1d_resource.h new file mode 100644 index 0000000000..c06b6b14e7 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv1d_resource.h @@ -0,0 +1,60 @@ +#ifndef NNET_DEPTH_CONV1D_LATENCY_H_ +#define NNET_DEPTH_CONV1D_LATENCY_H_ + +#include "nnet_common.h" +#include "nnet_conv1d_resource.h" +#include "nnet_mult.h" + +namespace nnet { + +template +void depthwise_conv_1d_resource_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + + int depth_multiplier = CONFIG_T::n_filt / CONFIG_T::n_chan; + [[intel::fpga_register]] int res_idx = 0; + + [[intel::fpga_register]] typename CONFIG_T::accum_t acc[CONFIG_T::out_width * CONFIG_T::n_filt]; + +DM_LOOP: + #pragma unroll + for (int dm = 0; dm < depth_multiplier; dm++) { + + WIDTH_LOOP: + #pragma unroll + for (int w = 0; w < CONFIG_T::out_width; w++) { + + CHAN_LOOP: + #pragma unroll + for (int c = 0; c < CONFIG_T::n_chan; c++) { + + res_idx = (w * CONFIG_T::n_filt) + (c * depth_multiplier) + dm; + + acc[res_idx] = biases[c * depth_multiplier + dm]; + + KERNEL_W_LOOP: + #pragma unroll + for (int kw = 0; kw < CONFIG_T::filt_width; kw++) { + + int w_in = w * CONFIG_T::stride_width + kw - CONFIG_T::pad_left; + + if ((w_in >= 0) && (w_in < CONFIG_T::in_width)) { + + acc[res_idx] += CONFIG_T::mult_config:: + template product::product( + data[(w_in)*CONFIG_T::n_chan + c], + weights[(dm * CONFIG_T::filt_width * CONFIG_T::n_chan) + (kw * CONFIG_T::n_chan) + c]); + } + } + } + } + } + +RESULT: + #pragma unroll + for (int ires = 0; ires < CONFIG_T::out_width * CONFIG_T::n_filt; ires++) { + res[ires] = cast(acc[ires]); + } +} +} // namespace nnet +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv2d.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv2d.h new file mode 100644 index 0000000000..87dc1805d9 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv2d.h @@ -0,0 +1,19 @@ +#ifndef NNET_DEPTH_CONV2D_H_ +#define NNET_DEPTH_CONV2D_H_ + +#include "nnet_common.h" +#include "nnet_conv2d.h" +#include "nnet_depthconv2d_resource.h" + +namespace nnet { + +template +void depthwise_conv_2d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + + depthwise_conv_2d_resource_cl(data, res, weights, biases); +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv2d_resource.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv2d_resource.h new file mode 100644 index 0000000000..91ddc28f65 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_depthconv2d_resource.h @@ -0,0 +1,76 @@ +#ifndef NNET_SEPARABLE_CONV2D_LATENCY_H_ +#define NNET_SEPARABLE_CONV2D_LATENCY_H_ + +#include "nnet_common.h" +#include "nnet_conv2d_resource.h" +#include "nnet_mult.h" + +namespace nnet { + +template +void depthwise_conv_2d_resource_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + + int depth_multiplier = CONFIG_T::n_filt / CONFIG_T::n_chan; + [[intel::fpga_register]] int res_idx = 0; + + [[intel::fpga_register]] typename CONFIG_T::accum_t acc[CONFIG_T::out_width * CONFIG_T::out_height * CONFIG_T::n_filt]; + +DM_LOOP: + #pragma unroll + for (int dm = 0; dm < depth_multiplier; dm++) { + + HEIGHT_LOOP: + #pragma unroll + for (int h = 0; h < CONFIG_T::out_height; h++) { + WIDTH_LOOP: + #pragma unroll + for (int w = 0; w < CONFIG_T::out_width; w++) { + + CHAN_LOOP: + #pragma unroll + for (int c = 0; c < CONFIG_T::n_chan; c++) { + + res_idx = + (h * CONFIG_T::out_width * CONFIG_T::n_filt) + (w * CONFIG_T::n_filt) + (c * depth_multiplier) + dm; + + acc[res_idx] = biases[c * depth_multiplier + dm]; + + KERNEL_H_LOOP: + #pragma unroll + for (int kh = 0; kh < CONFIG_T::filt_height; kh++) { + KERNEL_W_LOOP: + #pragma unroll + for (int kw = 0; kw < CONFIG_T::filt_width; kw++) { + + int h_in = h * CONFIG_T::stride_height + kh - CONFIG_T::pad_top; + int w_in = w * CONFIG_T::stride_width + kw - CONFIG_T::pad_left; + + if ((h_in >= 0) && (h_in < CONFIG_T::in_height) && (w_in >= 0) && (w_in < CONFIG_T::in_width)) { + + acc[res_idx] += + CONFIG_T::mult_config::template product:: + product( + data[(h_in)*CONFIG_T::in_width * CONFIG_T::n_chan + (w_in)*CONFIG_T::n_chan + c], + weights[(dm * CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan) + + (kh * CONFIG_T::filt_width * CONFIG_T::n_chan) + + (kw * CONFIG_T::n_chan) + c]); + + ; + } + } + } + } + } + } + } + +RESULT: + #pragma unroll + for (int ires = 0; ires < CONFIG_T::out_width * CONFIG_T::out_height * CONFIG_T::n_filt; ires++) { + res[ires] = cast(acc[ires]); + } +} +} // namespace nnet +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_embed.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_embed.h new file mode 100644 index 0000000000..1188fe3ecc --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_embed.h @@ -0,0 +1,43 @@ +#ifndef NNET_EMBED_H_ +#define NNET_EMBED_H_ + +#include "nnet_common.h" +#include "nnet_helpers.h" + +namespace nnet { + +struct embed_config { + // Internal data type definitions + typedef float embeddings_t; + + // (Default layer sizes, overwritten form the backend + static const unsigned n_in = 10; + static const unsigned n_out = 16; + static const unsigned vocab_size = 50; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; +}; + +template +void embedding(const data_T &data, res_T &res, const typename CONFIG_T::embeddings_t &embeddings) { + + /* + * Can store embeddings[] in a register, but a large multiiplexer + * is created due to a non-constant access pattern + */ + +InputSequence: + #pragma unroll + [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int j = 0; j < CONFIG_T::n_in; j++) { + DenseEmbedding: + #pragma unroll + for (int i = 0; i < CONFIG_T::n_out; i++) { + res[j * CONFIG_T::n_out + i] = embeddings[data[j].to_uint() * CONFIG_T::n_out + i]; + } + } +} + +} // namespace nnet +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_embed_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_embed_stream.h new file mode 100644 index 0000000000..0f2acb098c --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_embed_stream.h @@ -0,0 +1,31 @@ +#ifndef NNET_EMBED_STREAM_H_ +#define NNET_EMBED_STREAM_H_ + +namespace nnet { + +template +void embedding_stream(typename CONFIG_T::embeddings_t embeddings) { + + using res_T = typename ExtractPipeType::value_type; + constexpr auto datasize = std::tuple_size::value_type>{}; + + auto in_data = data_pipe::read(); + +InputSequence: + [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int j = 0; j < datasize; j++) { + + res_T res_pack; + + DenseEmbedding: + #pragma unroll + for (int i = 0; i < CONFIG_T::n_out; i++) { + res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i]; + } + + res_pipe::write(res_pack); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_helpers.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_helpers.h new file mode 100644 index 0000000000..c7af2e7a68 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_helpers.h @@ -0,0 +1,118 @@ +#ifndef NNET_HELPERS_H +#define NNET_HELPERS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nnet { + +template void convert_data(sycl::queue &q, srcType *src) { + constexpr auto dstTypeSize = std::tuple_size::value_type>{}; + for (size_t i = 0; i < SIZE / dstTypeSize; i++) { + typename ExtractPipeType::value_type ctype; + for (size_t j = 0; j < dstTypeSize; j++) { + ctype[j] = src[i * dstTypeSize + j]; + } + dest_pipe::write(q, ctype); + } +} + +template void convert_data_back(sycl::queue &q, dstType *dst) { + constexpr auto srcTypeSize = std::tuple_size::value_type>{}; + for (size_t i = 0; i < SIZE / srcTypeSize; i++) { + auto ctype = src_pipe::read(q); + for (size_t j = 0; j < srcTypeSize; j++) { + dst[i * srcTypeSize + j] = ctype[j].to_double(); + } + } +} + +extern bool trace_enabled; +extern std::map *trace_outputs; +extern size_t trace_type_size; + +// constexpr int ceillog2(int x) { return (x <= 2) ? 1 : 1 + ceillog2((x + 1) / 2); } +// replace with template metaprogramming +template struct ceillog2 { + enum { val = 1 + ceillog2<((n + 1) / 2)>::val }; +}; + +template <> struct ceillog2<2> { + enum { val = 1 }; +}; + +template <> struct ceillog2<1> { + enum { val = 0 }; +}; + +// constexpr int floorlog2(int x) { return (x < 2) ? 0 : 1 + floorlog2(x / 2); } +// replace with template metaprogramming +template struct floorlog2 { + enum { val = 1 + floorlog2<(n / 2)>::val }; +}; + +template <> struct floorlog2<1> { + enum { val = 0 }; +}; + +template <> struct floorlog2<0> { + enum { val = 0 }; +}; + +// constexpr int pow2(int x) { return x == 0 ? 1 : 2 * pow2(x - 1); } +// replace with template metaprogramming +template struct pow2 { + enum { val = 2 * pow2<(n - 1)>::val }; +}; + +template <> struct pow2<0> { + enum { val = 1 }; +}; + +template void save_output_array(data_T *data, save_T *ptr, size_t layer_size) { + for (int i = 0; i < layer_size; i++) { + ptr[i] = static_cast(data[i].to_double()); + } +} + +// We don't want to include save_T in this function because it will be inserted into myproject.cpp +// so a workaround with element size is used +template void save_layer_output(data_T *data, const char *layer_name, size_t layer_size) { + if (!trace_enabled) + return; + + if (trace_outputs) { + if (trace_outputs->count(layer_name) > 0) { + if (trace_type_size == 4) { + save_output_array(data, (float *)(*trace_outputs)[layer_name], layer_size); + } else if (trace_type_size == 8) { + save_output_array(data, (double *)(*trace_outputs)[layer_name], layer_size); + } else { + std::cout << "Unknown trace type!" << std::endl; + } + } else { + std::cout << "Layer name: " << layer_name << " not found in debug storage!" << std::endl; + } + } else { + std::ostringstream filename; + filename << "./tb_data/" << layer_name << "_output.log"; // TODO if run as a shared lib, path should be ../tb_data + std::fstream out; + out.open(filename.str(), std::ios::app); + assert(out.is_open()); + for (int i = 0; i < layer_size; i++) { + out << data[i] << " "; // We don't care about precision in text files + } + out << std::endl; + out.close(); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_merge.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_merge.h new file mode 100644 index 0000000000..d1262f4377 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_merge.h @@ -0,0 +1,233 @@ +#ifndef NNET_MERGE_H_ +#define NNET_MERGE_H_ + +#include "nnet_mult.h" + +namespace nnet { + +struct merge_config { + static const unsigned n_elem = 10; + static const unsigned reuse_factor = 1; +}; + +struct dot_config { + static const unsigned n_in = 10; + static const unsigned n_out = 1; + + static const unsigned reuse_factor = 1; + + typedef float accum_t; + + template using product = nnet::product::mult; +}; + +struct concat_config { + static const unsigned n_elem1_0 = 10; + static const unsigned n_elem1_1 = 10; + static const unsigned n_elem1_2 = 10; + static const unsigned n_elem2_0 = 10; + static const unsigned n_elem2_1 = 10; + static const unsigned n_elem2_2 = 10; + + static const unsigned axis = -1; +}; + +template +void add(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem; i++) { + res[i] = static_cast(data1[i] + data2[i]); + } +} + +template +void subtract(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem; i++) { + res[i] = static_cast(data1[i] - data2[i]); + } +} + +template +void multiply(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem; i++) { + res[i] = static_cast(data1[i] * data2[i]); + } +} + +template +void average(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem; i++) { + res[i] = static_cast((data1[i] + data2[i]) / 2); + } +} + +template +void maximum(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem; i++) { + res[i] = static_cast((data1[i] > data2[i]) ? data1[i] : data2[i]); + } +} + +template +void minimum(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem; i++) { + res[i] = static_cast((data1[i] < data2[i]) ? data1[i] : data2[i]); + } +} + +template +void dot1d(const input1_T &data1, const input2_T &data2, res_T &res) { + constexpr unsigned multiplier_limit = DIV_ROUNDUP(CONFIG_T::n_in, CONFIG_T::reuse_factor); + + [[intel::fpga_register]] typename CONFIG_T::accum_t mult[CONFIG_T::n_in]; +Product: + #pragma unroll multiplier_limit + for (int i = 0; i < CONFIG_T::n_in; i++) { + mult[i] = CONFIG_T::template product::product( + data1[i], data2[i]); + } + + [[intel::fpga_register]] typename CONFIG_T::accum_t acc = 0; +Accum: + #pragma unroll + for (int i = 0; i < CONFIG_T::n_in; i++) { + acc += mult[i]; + } + + res[0] = static_cast(acc); +} + +template +void concatenate1d(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + res[i] = static_cast(data1[i]); + } + + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem2_0; i++) { + res[CONFIG_T::n_elem1_0 + i] = static_cast(data2[i]); + } +} + +template +void concatenate2d_0(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1; i++) { + res[i] = static_cast(data1[i]); + } + + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1; i++) { + res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 + i] = static_cast(data2[i]); + } +} + +template +void concatenate2d_1(const input1_T &data1, const input2_T &data2, res_T &res) { + for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + #pragma unroll + for (int j = 0; j < CONFIG_T::n_elem1_1; j++) { + res[i * (CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_1) + j] = + static_cast(data1[i * CONFIG_T::n_elem1_1 + j]); + } + + #pragma unroll + for (int j = 0; j < CONFIG_T::n_elem2_1; j++) { + res[i * (CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_1) + CONFIG_T::n_elem1_1 + j] = + static_cast(data2[i * CONFIG_T::n_elem2_1 + j]); + } + } +} + +template +void concatenate2d(const input1_T &data1, const input2_T &data2, res_T &res) { + if (CONFIG_T::axis == 2 || CONFIG_T::axis == -1) { + concatenate2d_1(data1, data2, res); + } else { + concatenate2d_0(data1, data2, res); + } +} + +template +void concatenate3d_0(const input1_T &data1, const input2_T &data2, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2; i++) { + res[i] = static_cast(data1[i]); + } + + #pragma unroll + for (int i = 0; i < CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2; i++) { + res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + i] = + static_cast(data2[i]); + } +} + +template +void concatenate3d_1(const input1_T &data1, const input2_T &data2, res_T &res) { + for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + for (int j = 0; j < CONFIG_T::n_elem1_1; j++) { + #pragma unroll + for (int k = 0; k < CONFIG_T::n_elem1_2; k++) { + int res_idx = + i * (CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_1) * CONFIG_T::n_elem1_2 + j * CONFIG_T::n_elem1_2 + k; + int data_idx = i * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + j * CONFIG_T::n_elem1_2 + k; + res[res_idx] = static_cast(data1[data_idx]); + } + } + + for (int j = 0; j < CONFIG_T::n_elem2_1; j++) { + #pragma unroll + for (int k = 0; k < CONFIG_T::n_elem2_2; k++) { + int res_idx = i * (CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_1) * CONFIG_T::n_elem1_2 + + (j + CONFIG_T::n_elem1_1) * CONFIG_T::n_elem1_2 + k; + int data_idx = i * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2 + j * CONFIG_T::n_elem2_2 + k; + res[res_idx] = static_cast(data2[data_idx]); + } + } + } +} + +template +void concatenate3d_2(const input1_T &data1, const input2_T &data2, res_T &res) { + for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + for (int j = 0; j < CONFIG_T::n_elem1_1; j++) { + + #pragma unroll + for (int k = 0; k < CONFIG_T::n_elem1_2; k++) { + int res_idx = i * CONFIG_T::n_elem1_1 * (CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_2) + + j * (CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_2) + k; + int data_idx = i * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + j * CONFIG_T::n_elem1_2 + k; + res[res_idx] = static_cast(data1[data_idx]); + } + + #pragma unroll + for (int k = 0; k < CONFIG_T::n_elem1_2; k++) { + int res_idx = i * CONFIG_T::n_elem1_1 * (CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_2) + + j * (CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_2) + k + CONFIG_T::n_elem1_2; + int data_idx = i * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2 + j * CONFIG_T::n_elem2_2 + k; + res[res_idx] = static_cast(data2[data_idx]); + } + } + } +} + +template +void concatenate3d(const input1_T &data1, const input2_T &data2, res_T &res) { + if (CONFIG_T::axis == 3 || CONFIG_T::axis == -1) { + concatenate3d_2(data1, data2, res); + } else if (CONFIG_T::axis == 2 || CONFIG_T::axis == -2) { + concatenate3d_1(data1, data2, res); + } else { + concatenate3d_0(data1, data2, res); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_merge_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_merge_stream.h new file mode 100644 index 0000000000..60028ea52e --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_merge_stream.h @@ -0,0 +1,359 @@ +#ifndef NNET_MERGE_STREAM_H_ +#define NNET_MERGE_STREAM_H_ + +namespace nnet { + +template void add_stream() { + // both inputs are the same size + constexpr auto inputSize = std::tuple_size::value_type>{}; + constexpr auto outputSize = std::tuple_size::value_type>{}; + +AddLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem / inputSize; i++) { + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + AddPack: + #pragma unroll + for (int j = 0; j < outputSize; j++) { + out_data[j] = static_cast::value_type::value_type>(in_data1[j] + in_data2[j]); + } + + res_pipe::write(out_data); + } +} + +template void subtract_stream() { + // both inputs are the same size + constexpr auto inputSize = std::tuple_size::value_type>{}; + constexpr auto outputSize = std::tuple_size::value_type>{}; + +SubtractLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem / inputSize; i++) { + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + SubtractPack: + #pragma unroll + for (int j = 0; j < outputSize; j++) { + out_data[j] = static_cast::value_type::value_type>(in_data1[j] - in_data2[j]); + } + + res_pipe::write(out_data); + } +} + +template void multiply_stream() { + // both inputs are the same size + constexpr auto inputSize = std::tuple_size::value_type>{}; + constexpr auto outputSize = std::tuple_size::value_type>{}; + +MultLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem / inputSize; i++) { + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + MultPack: + #pragma unroll + for (int j = 0; j < outputSize; j++) { + out_data[j] = static_cast::value_type::value_type>(in_data1[j] * in_data2[j]); + } + + res_pipe::write(out_data); + } +} + +template void average_stream() { + // both inputs are the same size + constexpr auto inputSize = std::tuple_size::value_type>{}; + constexpr auto outputSize = std::tuple_size::value_type>{}; + +AvgLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem / inputSize; i++) { + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + AvgPack: + #pragma unroll + for (int j = 0; j < outputSize; j++) { + out_data[j] = static_cast::value_type::value_type>( + (in_data1[j] + in_data2[j]) / (typename ExtractPipeType::value_type::value_type)2); + } + + res_pipe::write(out_data); + } +} + +template void maximum_stream() { + // both inputs are the same size + constexpr auto inputSize = std::tuple_size::value_type>{}; + constexpr auto outputSize = std::tuple_size::value_type>{}; + +MaxLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem / inputSize; i++) { + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + MaxPack: + #pragma unroll + for (int j = 0; j < outputSize; j++) { + out_data[j] = static_cast::value_type::value_type>( + (in_data1[j] > in_data2[j]) ? in_data1[j] : in_data2[j]); + } + + res_pipe::write(out_data); + } +} + +template void minimum_stream() { + // both inputs are the same size + constexpr auto inputSize = std::tuple_size::value_type>{}; + constexpr auto outputSize = std::tuple_size::value_type>{}; + +MinLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem / inputSize; i++) { + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + MinPack: + #pragma unroll + for (int j = 0; j < outputSize; j++) { + out_data[j] = static_cast::value_type::value_type>( + (in_data1[j] < in_data2[j]) ? in_data1[j] : in_data2[j]); + } + + res_pipe::write(out_data); + } +} + +template void concatenate1d_stream() { + constexpr auto input1Size = std::tuple_size::value_type>{}; + constexpr auto input2Size = std::tuple_size::value_type>{}; + + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + +ConcatLoop1: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem1_0 / input2Size; i++) { + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + ConcatPack1: + #pragma unroll + for (int j = 0; j < input1Size; j++) { + out_data[j + (i * input1Size)] = + static_cast::value_type::value_type>(in_data1[j]); + } + } + +ConcatLoop2: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem2_0 / input2Size; i++) { + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + ConcatPack2: + #pragma unroll + for (int j = 0; j < input2Size; j++) { + out_data[j + (i * input2Size) + (CONFIG_T::n_elem1_0)] = + static_cast::value_type::value_type>(in_data2[j]); + } + } + res_pipe::write(out_data); +} + +template void concatenate2d_0_stream() { + constexpr auto input1Size = std::tuple_size::value_type>{}; + constexpr auto input2Size = std::tuple_size::value_type>{}; + +ConcatLoopHeight1: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + ConcatPackInput1: + #pragma unroll + for (int k = 0; k < input1Size; k++) { + out_data[k] = static_cast::value_type::value_type>(in_data1[k]); + } + + res_pipe::write(out_data); + } + +ConcatLoopHeight2: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem2_0; i++) { + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + ConcatPackInput2: + #pragma unroll + for (int k = 0; k < input2Size; k++) { + out_data[k] = static_cast::value_type::value_type>(in_data2[k]); + } + + res_pipe::write(out_data); + } +} + +template void concatenate2d_1_stream() { + constexpr auto input1Size = std::tuple_size::value_type>{}; + constexpr auto input2Size = std::tuple_size::value_type>{}; + +ConcatLoopHeight: + [[intel::initiation_interval(1)]] for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + ConcatPackInput1: + #pragma unroll + for (int k = 0; k < input1Size; k++) { + out_data[k] = static_cast::value_type::value_type>(in_data1[k]); + } + + ConcatPackInput2: + #pragma unroll + for (int k = 0; k < input2Size; k++) { + out_data[input1Size + k] = static_cast::value_type::value_type>(in_data2[k]); + } + + res_pipe::write(out_data); + } +} + +template void concatenate2d_stream() { + if (CONFIG_T::axis == 2 || CONFIG_T::axis == -1) { + concatenate2d_1_stream(); + } else { + concatenate2d_0_stream(); + } +} + +template void concatenate3d_0_stream() { + constexpr auto input1Size = std::tuple_size::value_type>{}; + constexpr auto input2Size = std::tuple_size::value_type>{}; + +ConcatLoopHeight1: + for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + ConcatLoopWidth1: + [[intel::initiation_interval(1)]] for (int j = 0; j < CONFIG_T::n_elem1_1; j++) { + + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + ConcatPackInput1: + #pragma unroll + for (int k = 0; k < input1Size; k++) { + out_data[k] = static_cast::value_type::value_type>(in_data1[k]); + } + + res_pipe::write(out_data); + } + } + +ConcatLoopHeight2: + for (int i = 0; i < CONFIG_T::n_elem2_0; i++) { + ConcatLoopWidth2: + [[intel::initiation_interval(1)]] for (int j = 0; j < CONFIG_T::n_elem2_1; j++) { + + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + ConcatPackInput2: + #pragma unroll + for (int k = 0; k < input2Size; k++) { + out_data[k] = static_cast::value_type::value_type>(in_data2[k]); + } + + res_pipe::write(out_data); + } + } +} + +template void concatenate3d_1_stream() { + constexpr auto input1Size = std::tuple_size::value_type>{}; + constexpr auto input2Size = std::tuple_size::value_type>{}; + +ConcatLoopHeight: + for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + ConcatLoopWidth1: + [[intel::initiation_interval(1)]] for (int j = 0; j < CONFIG_T::n_elem1_1; j++) { + + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + ConcatPackInput1: + #pragma unroll + for (int k = 0; k < input1Size; k++) { + out_data[k] = static_cast::value_type::value_type>(in_data1[k]); + } + + res_pipe::write(out_data); + } + ConcatLoopWidth2: + [[intel::initiation_interval(1)]] for (int j = 0; j < CONFIG_T::n_elem2_1; j++) { + + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + ConcatPackInput2: + #pragma unroll + for (int k = 0; k < input2Size; k++) { + out_data[k] = static_cast::value_type::value_type>(in_data2[k]); + } + + res_pipe::write(out_data); + } + } +} + +template void concatenate3d_2_stream() { + constexpr auto input1Size = std::tuple_size::value_type>{}; + constexpr auto input2Size = std::tuple_size::value_type>{}; + +ConcatLoopHeight: + for (int i = 0; i < CONFIG_T::n_elem1_0; i++) { + ConcatLoopWidth: + [[intel::initiation_interval(1)]] for (int j = 0; j < CONFIG_T::n_elem1_1; j++) { + + [[intel::fpga_register]] auto in_data1 = input1_pipe::read(); + [[intel::fpga_register]] auto in_data2 = input2_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type out_data; + + ConcatPackInput1: + #pragma unroll + for (int k = 0; k < input1Size; k++) { + out_data[k] = static_cast::value_type::value_type>(in_data1[k]); + } + + ConcatPackInput2: + #pragma unroll + for (int k = 0; k < input2Size; k++) { + out_data[input1Size + k] = + static_cast::value_type::value_type>(in_data2[k]); + } + + res_pipe::write(out_data); + } + } +} + +template void concatenate3d_stream() { + if (CONFIG_T::axis == 3 || CONFIG_T::axis == -1) { + concatenate3d_2_stream(); + } else if (CONFIG_T::axis == 2 || CONFIG_T::axis == -2) { + concatenate3d_1_stream(); + } else { + concatenate3d_0_stream(); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_mult.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_mult.h new file mode 100644 index 0000000000..c7dfc2d7c5 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_mult.h @@ -0,0 +1,113 @@ +#ifndef NNET_MULT_H_ +#define NNET_MULT_H_ + +#include "nnet_common.h" +#include "nnet_helpers.h" +#include + +namespace nnet { + +// Different methods to perform the product of input and weight, depending on their types. +namespace product { + +class Product { + public: + static void limit(unsigned multiplier_limit) {} +}; + +template class both_binary : public Product { + public: + inline static x_T product(x_T a, w_T w) { + // specialisation for 1-bit weights and incoming data + return a == w; + } +}; + +template class weight_binary : public Product { + public: + inline static auto product(x_T a, w_T w) -> decltype(-a) { + // Specialisation for 1-bit weights, arbitrary data + if (w == 0) + return -a; + else + return a; + } +}; + +template class data_binary : public Product { + public: + inline static auto product(x_T a, w_T w) -> decltype(-w) { + // Specialisation for 1-bit data, arbitrary weight + if (a == 0) + return -w; + else + return w; + } +}; + +template class weight_ternary : public Product { + public: + inline static auto product(x_T a, w_T w) -> decltype(-a) { + // Specialisation for 2-bit weights, arbitrary data + if (w == 0) + return 0; + else if (w == -1) + return -a; + else + return a; // if(w == 1) + } +}; + +template class mult : public Product { + public: + inline static auto product(x_T a, w_T w) -> decltype(a * w) { + // 'Normal' product + return a * w; + } + static void limit(unsigned multiplier_limit) { + // TODO: Implement for Quartus + // #pragma HLS ALLOCATION instances=mul limit=multiplier_limit operation > Vivado-only, replace with Intel HLS + // pragma + } +}; + +template class weight_exponential : public Product { + public: + using r_T = ac_fixed<2 * (w_T::second_type::width + x_T::width), (w_T::second_type::width + x_T::width), true>; + inline static r_T product(x_T a, w_T w) { + // Shift product for exponential weights + // Shift by the exponent. Negative weights shift right + r_T y = static_cast(a) << w.second; + + // Negate or not depending on weight sign + return w.first == 1 ? y : static_cast(-y); + } +}; +} // namespace product + +// TO-DO: These may need extra variants if ac_int types are used in more places +template +inline typename std::enable_if>::value && + std::is_same>::value, + ac_int>::type +cast(typename CONFIG_T::accum_t x) { + return static_cast>(((x - CONFIG_T::n_in / 2) * 2).to_ac_int()); +} + +template +inline typename std::enable_if>::value && + !std::is_same>::value, + res_T>::type +cast(typename CONFIG_T::accum_t x) { + return static_cast(x); +} + +template +inline typename std::enable_if<(!std::is_same>::value), res_T>::type +cast(typename CONFIG_T::accum_t x) { + return static_cast(x); +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_padding.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_padding.h new file mode 100644 index 0000000000..e8e3d6509b --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_padding.h @@ -0,0 +1,104 @@ +#ifndef NNET_PADDING_H_ +#define NNET_PADDING_H_ + +namespace nnet { + +struct padding1d_config { + static const unsigned in_width = 10; + static const unsigned out_width = 10; + static const unsigned n_chan = 10; + + static const unsigned pad_left = 0; + static const unsigned pad_right = 0; +}; + +template void zeropad1d_cl(const data_T &data, res_T &res) { + + auto resIter = res.begin(); + auto dataIter = data.cbegin(); + + for (int i = 0; i < CONFIG_T::pad_left; i++) { + #pragma unroll + for (int j = 0; j < CONFIG_T::n_chan; j++) { + *(resIter++) = 0; + } + } + + for (int i = 0; i < CONFIG_T::in_width; i++) { + #pragma unroll + for (int j = 0; j < CONFIG_T::n_chan; j++) { + *(resIter++) = static_cast(*(dataIter++)); + } + } + + for (int i = 0; i < CONFIG_T::pad_right; i++) { + #pragma unroll + for (int j = 0; j < CONFIG_T::n_chan; j++) { + *(resIter++) = 0; + } + } +} + +struct padding2d_config { + static const unsigned in_height = 10; + static const unsigned in_width = 10; + + static const unsigned out_height = 10; + static const unsigned out_width = 10; + + static const unsigned n_chan = 10; + + static const unsigned pad_top = 0; + static const unsigned pad_bottom = 0; + static const unsigned pad_left = 0; + static const unsigned pad_right = 0; +}; + +template void zeropad2d_cl(const data_T &data, res_T &res) { + + auto resIter = res.begin(); + auto dataIter = data.cbegin(); + + for (int i = 0; i < CONFIG_T::pad_top; i++) { + for (int j = 0; j < CONFIG_T::out_width; j++) { + #pragma unroll + for (int k = 0; k < CONFIG_T::n_chan; k++) { + *(resIter++) = 0; + } + } + } + + for (int i = 0; i < CONFIG_T::in_height; i++) { + for (int j = 0; j < CONFIG_T::pad_left; j++) { + #pragma unroll + for (int k = 0; k < CONFIG_T::n_chan; k++) { + *(resIter++) = 0; + } + } + for (int j = 0; j < CONFIG_T::in_width; j++) { + #pragma unroll + for (int k = 0; k < CONFIG_T::n_chan; k++) { + *(resIter++) = static_cast(*(dataIter++)); + } + } + for (int j = 0; j < CONFIG_T::pad_right; j++) { + #pragma unroll + for (int k = 0; k < CONFIG_T::n_chan; k++) { + *(resIter++) = 0; + } + } + } + + for (int i = 0; i < CONFIG_T::pad_bottom; i++) { + for (int j = 0; j < CONFIG_T::out_width; j++) { + #pragma unroll + for (int k = 0; k < CONFIG_T::n_chan; k++) { + *(resIter++) = 0; + } + } + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_padding_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_padding_stream.h new file mode 100644 index 0000000000..adb2efee29 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_padding_stream.h @@ -0,0 +1,81 @@ +#ifndef NNET_PADDING_STREAM_H_ +#define NNET_PADDING_STREAM_H_ + +namespace nnet { + +template inline void fill_zero() { + [[intel::fpga_register]] typename ExtractPipeType::value_type res_part; + #pragma unroll + for (int i = 0; i < CONFIG_T::n_chan; i++) { + res_part[i] = 0; + } + res_pipe::write(res_part); +} + +template inline void fill_data() { + [[intel::fpga_register]] auto data_part = data_pipe::read(); + [[intel::fpga_register]] typename ExtractPipeType::value_type res_part; + #pragma unroll + for (int i = 0; i < CONFIG_T::n_chan; i++) { + res_part[i] = data_part[i]; + } + res_pipe::write(res_part); +} + +template void zeropad1d_cl_stream() { +PadLeft: + for (int i = 0; i < CONFIG_T::pad_left; i++) { + fill_zero(); + } + +CopyMain: + for (int i = 0; i < CONFIG_T::in_width; i++) { + fill_data(); + } + +PadRight: + for (int i = 0; i < CONFIG_T::pad_right; i++) { + fill_zero(); + } +} + +template void zeropad2d_cl_stream() { +PadTop: + [[intel::loop_coalesce(2)]] for (int i = 0; i < CONFIG_T::pad_top; i++) { + PadTopWidth: + for (int j = 0; j < CONFIG_T::out_width; j++) { + fill_zero(); + } + } + +PadMain: + [[intel::loop_coalesce(2)]] for (int i = 0; i < CONFIG_T::in_height; i++) { + + PadLeft: + for (int j = 0; j < CONFIG_T::pad_left; j++) { + fill_zero(); + } + + CopyMain: + for (int j = 0; j < CONFIG_T::in_width; j++) { + fill_data(); + } + + PadRight: + for (int j = 0; j < CONFIG_T::pad_right; j++) { + fill_zero(); + } + } + +PadBottom: + for (int i = 0; i < CONFIG_T::pad_bottom; i++) { + PadBottomWidth: + for (int j = 0; j < CONFIG_T::out_width; j++) { + fill_zero(); + } + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_pooling.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_pooling.h new file mode 100644 index 0000000000..d4ae915335 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_pooling.h @@ -0,0 +1,257 @@ +#ifndef NNET_POOLING_H_ +#define NNET_POOLING_H_ + +#include "nnet_common.h" + +namespace nnet { + +// Returns the maximum value from an array of size N +template accum_t max(T x[N]) { + [[intel::fpga_register]] T y = x[0]; + + // Due to loop dependencies, pipelining & unrolling is not possible + // Explictily disabling pipeline significantly reduces resource usage + [[intel::disable_loop_pipelining]] for (int i = 1; i < N; i++) { + if (x[i] > y) + y = x[i]; + } + + return y; +} + +// Returns the mean value of an array of size N +template accum_t avg(T x[N], unsigned length) { + [[intel::fpga_register]] accum_t y = 0; + + // Due to loop dependencies, pipelining & unrolling is not possible + // Explictily disabling pipeline significantly reduces resource usage + [[intel::disable_loop_pipelining]] for (int i = 0; i < N; i++) { y += x[i]; } + + y /= length; + return y; +} + +// Enumeration for pooling functions +enum Pool_Op { Max, Average }; +template accum_t pool_op(T x[N], unsigned length) { + switch (op) { + case Max: + return max(x); + case Average: + return avg(x, length); + } +} + +template accum_t pool_op(T (&x)[N]) { + return pool_op(x, N); +} + +/* + * In Tensorflow, pooling ignores the value in the padded cells + * For Avg pooling, return 0 (the divisior is modified to the area overlapping the unpadded image.) + * For ax pooling, return the most negative value for the type. + */ +template inline T pad_val() { + switch (op) { + case Max: { + T x = 0; + x[x.width - 1] = 1; + return x; + } + case Average: + return 0; + } +} + +struct pooling1d_config { + // Pooling paramaters + static const unsigned pool_width = 2; + static const unsigned stride_width = 2; + + // I/O sizes + static const unsigned n_in = 10; + static const unsigned n_out = (n_in - pool_width) / stride_width + 1; + static const unsigned n_filt = 4; + + // Padding + static const unsigned pad_left = 0; + static const unsigned pad_right = 0; + static const bool count_pad = false; + + // Pooling function + static const Pool_Op pool_op = Max; +}; + +template void pooling1d_cl(const data_T &data, res_T &res) { + // Add padding and reduce input width to area covered by pooling function + static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right; + static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width; + +FiltLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int filt = 0; filt < CONFIG_T::n_filt; filt++) { + InputWidthLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int inp_col = 0; inp_col < restricted_padded_width; + inp_col += CONFIG_T::stride_width) { + [[intel::fpga_register]] typename data_T::value_type pool[CONFIG_T::pool_width]; + + // Keep track of number of pixels in image vs padding region; needed for rescaling Average Pooling + [[intel::fpga_register]] unsigned img_overlap = 0; + + PoolWidthLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int pool_col = 0; pool_col < CONFIG_T::stride_width; pool_col++) { + if (inp_col + pool_col < CONFIG_T::pad_left || + inp_col + pool_col >= (full_padded_width - CONFIG_T::pad_right)) { + // Add padding + pool[pool_col] = pad_val(); + if (CONFIG_T::count_pad) + img_overlap++; + } else { + // Current element is from input image + pool[pool_col] = data[(inp_col + pool_col - CONFIG_T::pad_left) * CONFIG_T::n_filt + filt]; + img_overlap++; + } + } + + // Pooling operation + res[(inp_col / CONFIG_T::stride_width) * CONFIG_T::n_filt + filt] = static_cast( + pool_op( + pool, img_overlap)); + } + } +} + +template void global_pooling1d_cl(const data_T &data, res_T &res) { + assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); + assert(CONFIG_T::pool_width == CONFIG_T::stride_width); + +FiltLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int filt = 0; filt < CONFIG_T::n_filt; filt++) { + [[intel::fpga_register]] typename data_T::value_type pool[CONFIG_T::n_in]; + + InputWidthLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int col = 0; col < CONFIG_T::n_in; col++) { + pool[col] = data[col * CONFIG_T::n_filt + filt]; + } + + res[filt] = static_cast( + pool_op(pool)); + } +} + +struct pooling2d_config { + // Pooling parameters + static const unsigned stride_height = 2; + static const unsigned stride_width = 2; + static const unsigned pool_height = 2; + static const unsigned pool_width = 2; + + // I/O sizes + static const unsigned in_height = 10; + static const unsigned in_width = 10; + static const unsigned n_filt = 4; + + static const unsigned out_height = (in_height - pool_height) / stride_height + 1; + static const unsigned out_width = (in_width - pool_width) / stride_width + 1; + + // Padding + static const unsigned pad_top = 0; + static const unsigned pad_bottom = 0; + static const unsigned pad_left = 0; + static const unsigned pad_right = 0; + static const bool count_pad = false; + + // Pooling function + static const Pool_Op pool_op = Max; +}; + +template void pooling2d_cl(const data_T &data, res_T &res) { + // Add padding and reduce input width to area covered by pooling function + static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right; + static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom; + static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width; + static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height; + +FiltLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int filt = 0; filt < CONFIG_T::n_filt; filt++) { + InputHeightLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int inp_col = 0; inp_col < restricted_padded_height; + inp_col += CONFIG_T::stride_height) { + InputWidthLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int inp_width = 0; inp_width < restricted_padded_width; + inp_width += CONFIG_T::stride_width) { + [[intel::fpga_register]] typename data_T::value_type pool[CONFIG_T::pool_height * CONFIG_T::pool_width]; + + // Keep track of number of pixels in image vs padding region; needed for rescaling Average Pooling + [[intel::fpga_register]] unsigned img_overlap = 0; + + PoolHeightLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int pool_col = 0; pool_col < CONFIG_T::stride_height; pool_col++) { + PoolWidthLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int pool_row = 0; pool_row < CONFIG_T::stride_width; + pool_row++) { + if (inp_col + pool_col < CONFIG_T::pad_top || + inp_col + pool_col >= (full_padded_height - CONFIG_T::pad_bottom) || + inp_width + pool_row < CONFIG_T::pad_left || + inp_width + pool_row >= (full_padded_width - CONFIG_T::pad_right)) { + // Add padding + pool[pool_col * CONFIG_T::stride_width + pool_row] = + pad_val(); + if (CONFIG_T::count_pad) + img_overlap++; + } else { + // Current element is from input image + pool[pool_col * CONFIG_T::stride_width + pool_row] = + data[(inp_col + pool_col - CONFIG_T::pad_top) * CONFIG_T::in_width * CONFIG_T::n_filt + + (inp_width + pool_row - CONFIG_T::pad_left) * CONFIG_T::n_filt + filt]; + img_overlap++; + } + } + } + + // Pooling operation + res[(inp_col / CONFIG_T::stride_height) * CONFIG_T::out_width * CONFIG_T::n_filt + + (inp_width / CONFIG_T::stride_width) * CONFIG_T::n_filt + filt] = + static_cast( + pool_op(pool, img_overlap)); + } + } + } +} + +template void global_pooling2d_cl(const data_T &data, res_T &res) { + assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); + assert(CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0); + assert(CONFIG_T::pool_width == CONFIG_T::stride_width); + assert(CONFIG_T::pool_height == CONFIG_T::stride_height); + +FiltLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int filt = 0; filt < CONFIG_T::n_filt; filt++) { + [[intel::fpga_register]] typename data_T::value_type pool[CONFIG_T::in_height * CONFIG_T::in_width]; + + InputLoop: + #pragma unroll + [[intel::disable_loop_pipelining]] for (int i = 0; i < CONFIG_T::in_height * CONFIG_T::in_width; i++) { + pool[i] = data[i * CONFIG_T::n_filt + filt]; + } + + res[filt] = static_cast( + pool_op(pool)); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_pooling_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_pooling_stream.h new file mode 100644 index 0000000000..9c30aab67d --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_pooling_stream.h @@ -0,0 +1,322 @@ +#ifndef NNET_POOLING_STREAM_H_ +#define NNET_POOLING_STREAM_H_ + +#include "nnet_conv1d_stream.h" +#include "nnet_conv2d_stream.h" +#include "nnet_pooling.h" +#include "nnet_types.h" + +namespace nnet { + +/* + * void compute_pool_buffer_1d(in_element, res_stream, line_buffer, kernel_window) + * + * Args: + * in_element - current elements from input image, data_T type is usually nnet::array, size of array corresponds to number + * of channels res_stream - output stream, passed by reference to allow direct writing line_buffer - chained array of shift + * registers, one for each row of the pool and channel kernel_window - array of values from the input curently being pooled + * + * Function executes 4 steps: + * (1) Shift line buffer - updates the contents of the chained shift registers, inserting the new inputs and removing last + * elements (2) Kernel shift - updates the elements of the kernel window, by storing the new inputs and popped elements from + * the line buffer (3) Pooling - performs dense matrix multiplication between the current input window and kernel weights (4) + * Counter housekeeping - performs the required pooling operation + * + */ +template +void compute_pool_buffer_1d(const data_T &in_elem, + nnet::shift_reg line_buffer[CONFIG_T::n_filt], + data_window_T &kernel_window, int &pX, int &sX) { + + using res_T = typename ExtractPipeType::value_type; + + // Thresholds + constexpr int lShiftX = CONFIG_T::pool_width - 1; + + // Step 1 - Shift line buffer + [[intel::fpga_register]] typename data_T::value_type shift_buffer[CONFIG_T::n_filt]; + nnet::shift_line_buffer_1d(in_elem, line_buffer, shift_buffer); + + // Step 2 - Kernel shift + nnet::kernel_shift_1d(shift_buffer, kernel_window); + + // Check to see if we have a full pool window + if ((sX - lShiftX) == 0 && pX > (lShiftX - 1)) { + [[intel::fpga_register]] res_T res_pack; + + FiltLoop: + #pragma unroll + for (int filter = 0; filter < CONFIG_T::n_filt; filter++) { + [[intel::fpga_register]] typename data_T::value_type pool_window[CONFIG_T::pool_width]; + + // Retrieve data for current channel + PoolLoop: + #pragma unroll + for (int i = 0; i < CONFIG_T::pool_width; i++) { + pool_window[i] = kernel_window[i * CONFIG_T::n_filt + filter]; + } + + // Step 3 - Pooling + res_pack[filter] = static_cast( + pool_op( + pool_window)); + } + + // Write result to output stream + res_pipe::write(res_pack); + } + + // Reached end of image + if ((pX + 1) == (CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right)) { + pX = 0; + sX = 0; + // Move to the right + } else { + pX++; + sX = ((sX - lShiftX) == 0) ? (sX - CONFIG_T::stride_width + 1) : (sX + 1); + } +} + +template void pooling1d_cl_stream() { + assert(CONFIG_T::pool_width == CONFIG_T::stride_width); + assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); + + using data_arr_T = typename ExtractPipeType::value_type; + using data_element_T = typename data_arr_T::value_type; + using data_window_T = array; + + // Line buffer and kernel window + [[intel::fpga_register]] nnet::shift_reg line_buffer[CONFIG_T::n_filt]; + [[intel::fpga_register]] data_window_T kernel_window; + + // move former static variables outside the function calls + // X position pixel + int pX = 0; + // X strides + int sX = 0; + +// Read input image +ReadInputWidth: + for (int col = 0; col < CONFIG_T::in_width; col++) { + compute_pool_buffer_1d(data_pipe::read(), line_buffer, kernel_window, + pX, sX); + } +} + +/* + * void compute_pool_buffer_2d(in_element, res_stream, line_buffer, kernel_window) + * + * Args: + * in_element - current elements from input image, data_T type is usually nnet::array, size of array corresponds to number + * of channels res_stream - output stream, passed by reference to allow direct writing line_buffer - chained array of shift + * registers, one for each row of the pool and channel kernel_window - array of values from the input curently being pooled + * + * Function executes 4 steps: + * (1) Shift line buffer - updates the contents of the chained shift registers, inserting the new inputs and removing last + * elements (2) Kernel shift - updates the elements of the kernel window, by storing the new inputs and popped elements from + * the line buffer (3) Pooling - performs dense matrix multiplication between the current input window and kernel weights (4) + * Counter housekeeping - performs the required pooling operation + * + */ +template +void compute_pool_buffer_2d(const data_T &in_elem, + nnet::shift_reg + line_buffer[CONFIG_T::pool_height - 1][CONFIG_T::n_filt], + data_window_T &kernel_window, int &pX, int &pY, int &sX, int &sY) { + + using res_T = typename ExtractPipeType::value_type; + + // Thresholds + static constexpr int lShiftX = CONFIG_T::pool_width - 1; + static constexpr int lShiftY = CONFIG_T::pool_height - 1; + + // Step 1 - Shift line buffer + [[intel::fpga_register]] typename data_T::value_type shift_buffer[CONFIG_T::pool_height][CONFIG_T::n_filt]; + nnet::shift_line_buffer_2d(in_elem, line_buffer, shift_buffer); + + // Step 2 - Kernel shift + nnet::kernel_shift_2d(shift_buffer, kernel_window); + + // Check to see if we have a full pool window + if ((sX - lShiftX) == 0 && (sY - lShiftY) == 0 && pY > (lShiftY - 1) && pX > (lShiftX - 1)) { + [[intel::fpga_register]] res_T res_pack; + + FiltLoop: + #pragma unroll + for (int filter = 0; filter < CONFIG_T::n_filt; filter++) { + [[intel::fpga_register]] typename data_T::value_type pool_window[CONFIG_T::pool_height * CONFIG_T::pool_width]; + + // Retrieve data for current channel + PoolLoop: + #pragma unroll + for (int i = 0; i < CONFIG_T::pool_height * CONFIG_T::pool_width; i++) { + pool_window[i] = kernel_window[i * CONFIG_T::n_filt + filter]; + } + + // Step 3 - Pooling + res_pack[filter] = static_cast( + pool_op(pool_window)); + } + + // Write result to output stream + res_pipe::write(res_pack); + } + + // Reached end of image + if ((pX + 1) == (CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right) && + (pY + 1) == (CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom)) { + pX = 0; + sX = 0; + pY = 0; + sY = 0; + // Reached end of row + } else if ((pX + 1) == (CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right)) { + pX = 0; + sX = 0; + pY++; + sY = ((sY - lShiftY) == 0) ? (sY - CONFIG_T::stride_height + 1) : (sY + 1); + // Same row, same colum, therefore, move to the right + } else { + pX++; + sX = ((sX - lShiftX) == 0) ? (sX - CONFIG_T::stride_width + 1) : (sX + 1); + } +} + +template void pooling2d_cl_stream() { + assert(CONFIG_T::pool_height == CONFIG_T::stride_height && CONFIG_T::pool_width == CONFIG_T::stride_width); + assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); + assert(CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0); + + using data_arr_T = typename ExtractPipeType::value_type; + using data_element_T = typename data_arr_T::value_type; + using data_window_T = array; + + // Line buffer and kernel window + [[intel::fpga_register]] nnet::shift_reg + line_buffer[MAX(CONFIG_T::pool_height - 1, 1)][CONFIG_T::n_filt]; + [[intel::fpga_register]] data_window_T kernel_window; + + // former static variables + // X, Y position pixels + int pX = 0; + int pY = 0; + + // X, Y strides + int sX = 0; + int sY = 0; + +ReadInputHeight: + [[intel::loop_coalesce(2)]] for (int row = 0; row < CONFIG_T::in_height; row++) { + // Read input image + ReadInputWidth: + for (int col = 0; col < CONFIG_T::in_width; col++) { + compute_pool_buffer_2d(data_pipe::read(), line_buffer, + kernel_window, pX, pY, sX, sY); + } + } +} + +/* + * A function used with Global Pooling + * Updates the output pooling value + * Max : Return the maximum between the previous maximum and current input + * Avg : Returns the cumulative sum + */ +template inline T_y reduce_global_pool(T_y y, T_x x) { + if (op == Max) { + return (x > y) ? (T_y)x : y; + } else { + return (T_y)(x + y); + } +} + +/* + * A function used with Global Pooling + * For every filter, it updates the value by summing the current input (Average) or updating the maximum value (Max) + */ +template void compute_global_pool(const data_T &in_elem, res_T &data_input) { + #pragma unroll + for (unsigned i = 0; i < CONFIG_T::n_filt; i++) { + data_input[i] = reduce_global_pool( + data_input[i], in_elem[i]); + } +} + +template void global_pooling1d_cl_stream() { + assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); + + using data_T = typename ExtractPipeType::value_type; + using res_T = typename ExtractPipeType::value_type; + + using accum_arr_t = array; + + [[intel::fpga_register]] accum_arr_t data_input; + + #pragma unroll + for (int i = 0; i < CONFIG_T::n_filt; i++) { + data_input[i] = pad_val(); + } + + for (int i = 0; i < CONFIG_T::n_in; i++) { + compute_global_pool(data_pipe::read(), data_input); + } + + [[intel::fpga_register]] res_T res_pack; + if (CONFIG_T::pool_op == Average) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_filt; i++) { + res_pack[i] = static_cast(data_input[i] / CONFIG_T::n_in); + } + } else { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_filt; i++) { + res_pack[i] = static_cast(data_input[i]); + } + } + + res_pipe::write(res_pack); +} + +template void global_pooling2d_cl_stream() { + assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); + assert(CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0); + + using data_T = typename ExtractPipeType::value_type; + using res_T = typename ExtractPipeType::value_type; + + using accum_arr_t = array; + + [[intel::fpga_register]] accum_arr_t data_input; + + #pragma unroll + for (int i = 0; i < CONFIG_T::n_filt; i++) { + data_input[i] = pad_val(); + } + + for (int i = 0; i < CONFIG_T::in_height; i++) { + for (int j = 0; j < CONFIG_T::in_width; j++) { + compute_global_pool(data_pipe::read(), data_input); + } + } + + [[intel::fpga_register]] res_T res_pack; + if (CONFIG_T::pool_op == Average) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_filt; i++) { + res_pack[i] = + static_cast(data_input[i] / (CONFIG_T::in_width * CONFIG_T::in_height)); + } + } else { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_filt; i++) { + res_pack[i] = static_cast(data_input[i]); + } + } + + res_pipe::write(res_pack); +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_printf.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_printf.h new file mode 100644 index 0000000000..5fec90d1aa --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_printf.h @@ -0,0 +1,18 @@ +#ifndef NNET_PRINTF_H_ +#define NNET_PRINTF_H_ + +#ifdef __SYCL_DEVICE_ONLY__ +#define CL_CONSTANT __attribute__((opencl_constant)) +#else +#define CL_CONSTANT +#endif + +using namespace sycl; + +#define PRINTF(format, ...) \ + { \ + static const CL_CONSTANT char _format[] = format; \ + ext::oneapi::experimental::printf(_format, ##__VA_ARGS__); \ + } + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent.h new file mode 100644 index 0000000000..e1a0903d4c --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent.h @@ -0,0 +1,566 @@ +#ifndef NNET_RECURRENT_H_ +#define NNET_RECURRENT_H_ + +#include "nnet_common.h" +#include "nnet_dense.h" +#include "nnet_recurrent_activation.h" + +namespace nnet { + +//---------------------- +// Utils +//---------------------- + +template +void multiply_W(const data_T &input, res_T &out, const weight_t &weight) { +MULTIPLY_W_LOOP_I: + #pragma unroll + for (int i = 0; i < N_OUT; i++) { + out[i] = 0; + + MULTIPLY_W_LOOP_J: + #pragma unroll + for (int j = 0; j < N_IN; j++) { + out[i] += input[j] * weight[i * N_IN + j]; + } + } +} + +template +void multiply_U(const data_T &input, res_T &out, const weight_t &weight) { +MULTIPLY_U_LOOP_I: + #pragma unroll + for (int i = 0; i < N_OUT; i++) { + out[i] = 0; + + MULTIPLY_U_LOOP_J: + #pragma unroll + for (int j = 0; j < N_OUT; j++) { + out[i] += input[j] * weight[i * N_OUT + j]; + } + } +} + +template +void add_bias(const data_T &inputs, res_T &out, const bias_t &bias) { +ADD_BIAS_LOOP: + #pragma unroll + for (int i = 0; i < N; i++) { + out[i] = inputs[i] + bias[i]; + } +} + +template +void multiply_vectors(const data1_T &in1, const data2_T &in2, res_T &out) { +MULTIPLY_VECT_LOOP: + #pragma unroll + for (int i = 0; i < N; i++) { + out[i] = in1[i] * in2[i]; + } +} + +template +void add_vectors(const data1_T &in1, const data2_T &in2, res_T &out) { +ADD_VECTOR_LOOP: + #pragma unroll + for (int i = 0; i < N; i++) { + out[i] = in1[i] + in2[i]; + } +} + +//---------------------- +// GRU +//---------------------- + +struct gru_config { + // Internal data type definitions + typedef float weight_t; + typedef float bias_t; + typedef float accum_t; + + // Layer Sizes + static const unsigned n_in = 1; + static const unsigned n_out = 1; + static const unsigned n_units = 1; + static const unsigned n_timesteps = 1; + static const unsigned n_outputs = 1; + static const bool return_sequences = false; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; + static const bool store_weights_in_bram = false; + + // Activation + template using activation_recr = nnet::activation::relu; + + template using activation = nnet::activation::relu; +}; + +template +void gru_cell(const data_T &x, h_T &h, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::recurrent_weight_t &recurrent_weights, const typename CONFIG_T::bias_t &bias, + const typename CONFIG_T::recurrent_bias_t &recurrent_bias) { + static constexpr int recurrent_unroll_factor = CONFIG_T::n_units / CONFIG_T::reuse_factor; + // A matrix containing the values of matrix product between input (x) and weights (weights), for update, reset and + // candidate state gates, for each of the units + + using accum_array_T = array; + + [[intel::fpga_register]] accum_array_T mat_mul_x_w; + nnet::dense_resource(x, mat_mul_x_w, weights, bias); + + // A matrix containing the values of matrix product between previou state (h) and recurrent weights (recurrent_weights), + // for update, reset and candidate state gates, for each of the units + [[intel::fpga_register]] accum_array_T mat_mul_h_wr; + nnet::dense_resource(h, mat_mul_h_wr, recurrent_weights, + recurrent_bias); + + // A vector containing both the values of z(t) and r(t) for every state + using z_activ_array_T = array; + [[intel::fpga_register]] z_activ_array_T z_r; + + // Add the individual vectors from the multiplication of mat_mul_x_w = Wx*x(t) and mat_mul_h_wr = Wh*h(t-1) + // Unrolled fully, no DSPs used + #pragma unroll + for (int i = 0; i < (2 * CONFIG_T::n_units); i++) { + z_r[i] = mat_mul_x_w[i] + mat_mul_h_wr[i]; + } + + // Activation on z(t) and r(t) + [[intel::fpga_register]] z_activ_array_T z_r_act; + CONFIG_T::template activation_recr::activation(z_r, z_r_act); + + // A matrix containing the values of Hadamard product between r(t) = z_r_act[n_units:2*n_units] and h(t-1) = h + using h_activ_array_T = array; + [[intel::fpga_register]] h_activ_array_T hadamard_r_h; + #pragma unroll recurrent_unroll_factor + for (int i = 0; i < (CONFIG_T::n_units); i++) { + hadamard_r_h[i] = z_r_act[i + CONFIG_T::n_units] * mat_mul_h_wr[i + 2 * CONFIG_T::n_units]; + } + + // The candidate state; X * W_{hx} + hadmard(r(t), h_(t-1)) * W_{hh} + b_{h} + [[intel::fpga_register]] h_activ_array_T h_cand; + // Addition - can unroll fully; no DSPs used here + #pragma unroll + for (int i = 0; i < (CONFIG_T::n_units); i++) { + h_cand[i] = mat_mul_x_w[i + 2 * CONFIG_T::n_units] + hadamard_r_h[i]; + } + + // Activation on candidate state + [[intel::fpga_register]] h_activ_array_T h_cand_act; + CONFIG_T::template activation::activation(h_cand, + h_cand_act); + + // Update state + #pragma unroll recurrent_unroll_factor + for (int i = 0; i < (CONFIG_T::n_units); i++) { + h[i] = static_cast(h_cand_act[i] * (1 - z_r_act[i]) + h[i] * z_r_act[i]); + } +} + +template +void gru(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::recurrent_weight_t &recurrent_weights, const typename CONFIG_T::bias_t &bias, + const typename CONFIG_T::recurrent_bias_t &recurrent_bias) { + + using h_T = array; + [[intel::fpga_register]] data_T x; + [[intel::fpga_register]] h_T h; + + #pragma unroll + for (int i = 0; i < CONFIG_T::n_units; i++) { + h[i] = 0; + } + + // Loop depedency - cannot pipeline + [[intel::disable_loop_pipelining]] for (int t = 0; t < CONFIG_T::n_timesteps; t++) { + // Get data at current time step + #pragma unroll + for (int j = 0; j < CONFIG_T::n_in; j++) { + x[j] = data[j + t * CONFIG_T::n_in]; + } + + nnet::gru_cell(x, h, weights, recurrent_weights, bias, recurrent_bias); + + if (CONFIG_T::return_sequences) { + #pragma unroll + for (int i = 0; i < CONFIG_T::n_units; i++) { + res[CONFIG_T::n_units * t + i] = h[i]; + } + } + } + + if (!CONFIG_T::return_sequences) { + #pragma unroll + for (int i = 0; i < (CONFIG_T::n_units); i++) { + res[i] = h[i]; + } + } +} + +//---------------------- +// SimpleRNN +//---------------------- + +struct simpleRNN_config { + // Internal data type definitions + typedef float weight_t; + typedef float bias_t; + typedef float accum_t; + + // Layer Sizes + static const unsigned n_in = 1; + static const unsigned n_out = 1; + static const unsigned n_outputs = 1; + static const unsigned n_timesteps = 1; + static const bool return_sequences = false; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; + static const bool store_weights_in_bram = false; + + // Activation + template using activation_recr = nnet::activation::relu; + + template using activation = nnet::activation::relu; +}; + +template +void simple_rnn_cell(const in_T &inputs, h_T &hidden_state, h_T &hidden_state_o, const typename CONFIG_T::weight_t &kernel, + const typename CONFIG_T::recurrent_weight_t &rec_kernel, const typename CONFIG_T::bias_t &bias) { + + using accum_array_T = array; + // Weight multiplication + [[intel::fpga_register]] accum_array_T afterW; + multiply_W(inputs, afterW, kernel); + + // Bias addition + [[intel::fpga_register]] accum_array_T afterBias; + add_bias(afterW, afterBias, bias); + + // Hidden state + [[intel::fpga_register]] accum_array_T hiddenCand; + multiply_U(hidden_state, hiddenCand, + rec_kernel); + + // Vector addition + [[intel::fpga_register]] accum_array_T afterAdd; + add_vectors(afterBias, hiddenCand, afterAdd); + + // Activation + CONFIG_T::template activation::activation(afterAdd, hidden_state_o); +} + +template +void simple_rnn(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &kernel, + const typename CONFIG_T::recurrent_weight_t &rec_kernel, const typename CONFIG_T::bias_t &bias) { + + using in_T = array; + using h_T = array; + + [[intel::fpga_register]] h_T hidden_state[CONFIG_T::n_timesteps + 1]; + [[intel::fpga_register]] h_T hidden_state_temp; + [[intel::fpga_register]] h_T h; + [[intel::fpga_register]] in_T in; + +// Set initially hidden state (output) to zero +INIT_LOOP: + #pragma unroll + for (int x = 0; x < CONFIG_T::n_out; x++) { + hidden_state[0][x] = 0; + } + + [[intel::disable_loop_pipelining]] for (int i = 0; i < CONFIG_T::n_timesteps; i++) { + + // Data at current time step + #pragma unroll + for (int x = 0; x < CONFIG_T::n_in; x++) { + in[x] = data[x + i * CONFIG_T::n_in]; + } + + // Hidden state at current time step + #pragma unroll + for (int x = 0; x < CONFIG_T::n_out; x++) { + hidden_state_temp[x] = hidden_state[i][x]; + } + + // Do SimpleRNN + simple_rnn_cell(in, hidden_state_temp, h, kernel, rec_kernel, bias); + + // Write result + #pragma unroll + for (int x = 0; x < CONFIG_T::n_out; x++) { + hidden_state[i + 1][x] = h[x]; + } + } + + if (CONFIG_T::return_sequences == 0) { + // Output when return_sequences is false + #pragma unroll + for (int x = 0; x < CONFIG_T::n_out; x++) { + res[x] = hidden_state[CONFIG_T::n_timesteps][x]; + } + } else { + // Output when return_sequences is true + #pragma unroll + for (int x = 0; x < CONFIG_T::n_timesteps; x++) { + #pragma unroll + for (int h = 0; h < CONFIG_T::n_out; h++) { + res[x * CONFIG_T::n_out + h] = hidden_state[x + 1][h]; + } + } + } +} + +//---------------------- +// LSTM +//---------------------- + +struct lstm_config { + // Internal data type definitions + typedef float weight_t; + typedef float bias_t; + typedef float accum_t; + + // Layer Sizes + static const unsigned n_in = 1; + static const unsigned n_out = 1; + static const unsigned n_outputs = 1; + + static const unsigned n_timesteps = 1; + static const bool return_sequences = false; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; + static const bool store_weights_in_bram = false; + + // Activation + template using activation_recr = nnet::activation::relu; + + template using activation = nnet::activation::relu; +}; + +template +void lstm_cell(const in_T &inputs, h_T &hidden_state, h_T &hidden_state_o, h_T &cell_state, h_T &cell_state_o, + const typename CONFIG_T::weight_i_t &WI, const typename CONFIG_T::weight_f_t &WF, + const typename CONFIG_T::weight_c_t &WC, const typename CONFIG_T::weight_o_t &WO, + const typename CONFIG_T::recurrent_weight_i_t &RWI, const typename CONFIG_T::recurrent_weight_f_t &RWF, + const typename CONFIG_T::recurrent_weight_c_t &RWC, const typename CONFIG_T::recurrent_weight_o_t &RWO, + const typename CONFIG_T::bias_i_t &BI, const typename CONFIG_T::bias_f_t BF, + const typename CONFIG_T::bias_c_t &BC, const typename CONFIG_T::bias_o_t BO) { + + using accum_array_T = array; + + // Internals definitions + [[intel::fpga_register]] accum_array_T i_afterW; + [[intel::fpga_register]] accum_array_T i_afterBias; + [[intel::fpga_register]] accum_array_T c_afterW; + [[intel::fpga_register]] accum_array_T c_afterBias; + [[intel::fpga_register]] accum_array_T o_afterW; + [[intel::fpga_register]] accum_array_T o_afterBias; + [[intel::fpga_register]] accum_array_T f_afterW; + [[intel::fpga_register]] accum_array_T f_afterBias; + + // Hidden state Gate candidates, intermediate variables + [[intel::fpga_register]] accum_array_T i_hiddenCand; + [[intel::fpga_register]] accum_array_T f_hiddenCand; + [[intel::fpga_register]] accum_array_T c_hiddenCand; + [[intel::fpga_register]] accum_array_T o_hiddenCand; + + // After addition, intermediate variables + [[intel::fpga_register]] accum_array_T i_afterAdd; + [[intel::fpga_register]] accum_array_T f_afterAdd; + [[intel::fpga_register]] accum_array_T c_afterAdd; + [[intel::fpga_register]] accum_array_T o_afterAdd; + + // Gate outputs + [[intel::fpga_register]] accum_array_T gate_i; + [[intel::fpga_register]] accum_array_T gate_f; + [[intel::fpga_register]] accum_array_T gate_c; + [[intel::fpga_register]] accum_array_T gate_o; + [[intel::fpga_register]] accum_array_T gate_ic; + [[intel::fpga_register]] accum_array_T gate_forget; + [[intel::fpga_register]] accum_array_T h; + + // Intermediate variable cell calculation + [[intel::fpga_register]] accum_array_T cell_act_multp; + [[intel::fpga_register]] accum_array_T cell_act_add; + + //-----------Gate I Calculations + // Weight multiplication + multiply_W(inputs, i_afterW, WI); + + // Bias addition + add_bias(i_afterW, i_afterBias, BI); + + // Hidden Candidate + multiply_U(hidden_state, i_hiddenCand, + RWI); + + // Vector addition + add_vectors(i_afterBias, i_hiddenCand, i_afterAdd); + + // Activation + CONFIG_T::template activation_recr::activation( + i_afterAdd, gate_i); + + //-----------Gate F Calculations + // Weight multiplication + multiply_W(inputs, f_afterW, WF); + + // Bias addition + add_bias(f_afterW, f_afterBias, BF); + + // Hidden Candidate + multiply_U(hidden_state, f_hiddenCand, + RWF); + + // Vector addition + add_vectors(f_afterBias, f_hiddenCand, f_afterAdd); + + // Activation + CONFIG_T::template activation_recr::activation( + f_afterAdd, gate_f); + + //-----------Gate C Calculations + // Weight multiplication + multiply_W(inputs, c_afterW, WC); + + // Bias addition + add_bias(c_afterW, c_afterBias, BC); + + // Hidden Candidate + multiply_U(hidden_state, c_hiddenCand, + RWC); + + // Vector addition + add_vectors(c_afterBias, c_hiddenCand, c_afterAdd); + + // Activation + CONFIG_T::template activation::activation( + c_afterAdd, gate_c); + + //-----------gate I and C multiply + // Vector multiplication + multiply_vectors(gate_i, gate_c, gate_ic); + + //-----------Gate O Calculations + // Weight multiplication + multiply_W(inputs, o_afterW, WO); + + // Bias addition + add_bias(o_afterW, o_afterBias, BO); + + // Hidden Candidate + multiply_U(hidden_state, o_hiddenCand, + RWO); + + // Vector addition + add_vectors(o_afterBias, o_hiddenCand, o_afterAdd); + + // Activation + CONFIG_T::template activation_recr::activation( + o_afterAdd, gate_o); + + //-----------Cell State Calculation + // Vector multiplication + multiply_vectors(gate_f, cell_state, cell_act_multp); + + // Vector addition + add_vectors(gate_ic, cell_act_multp, cell_act_add); + + //-----------Forget gate Calculation + // Activation + CONFIG_T::template activation::activation( + cell_act_add, gate_forget); + + // Vector multiplication + multiply_vectors(gate_o, gate_forget, h); + +OUTPUT_WRITE_LOOP: + #pragma unroll + for (int x = (CONFIG_T::n_out - 1); x >= 0; x--) { + hidden_state_o[x] = h[x]; + cell_state_o[x] = cell_act_add[x]; + } +} + +template +void lstm(const data_T &data, res_T &res, const typename CONFIG_T::weight_i_t &WI, const typename CONFIG_T::weight_f_t &WF, + const typename CONFIG_T::weight_c_t &WC, const typename CONFIG_T::weight_o_t &WO, + const typename CONFIG_T::recurrent_weight_i_t &RWI, const typename CONFIG_T::recurrent_weight_f_t &RWF, + const typename CONFIG_T::recurrent_weight_c_t &RWC, const typename CONFIG_T::recurrent_weight_o_t &RWO, + const typename CONFIG_T::bias_i_t &BI, const typename CONFIG_T::bias_f_t &BF, + const typename CONFIG_T::bias_c_t &BC, const typename CONFIG_T::bias_o_t &BO) { + + // Note: currently this does not support recurrent bias + + using in_T = array; + using h_T = array; + + [[intel::fpga_register]] h_T hidden_state[CONFIG_T::n_timesteps + 1]; + [[intel::fpga_register]] h_T hidden_state_temp; + [[intel::fpga_register]] h_T cell_state[CONFIG_T::n_timesteps + 1]; + [[intel::fpga_register]] h_T cell_state_temp; + [[intel::fpga_register]] h_T h; + [[intel::fpga_register]] h_T c; + [[intel::fpga_register]] in_T in; + +// Set initially hidden state (output) to zero +INIT_LOOP: + #pragma unroll + for (int x = 0; x < CONFIG_T::n_out; x++) { + hidden_state[0][x] = 0; + cell_state[0][x] = 0; + } + + // Input dimension + [[intel::disable_loop_pipelining]] for (int i = 0; i < CONFIG_T::n_timesteps; i++) { + // Data at current time step + for (int x = 0; x < CONFIG_T::n_in; x++) { + in[x] = data[x + i * CONFIG_T::n_in]; + } + + // Hidden state at current time step + #pragma unroll + for (int x = 0; x < CONFIG_T::n_out; x++) { + hidden_state_temp[x] = hidden_state[i][x]; + cell_state_temp[x] = cell_state[i][x]; + } + + // Do LSTM + lstm_cell(in, hidden_state_temp, h, cell_state_temp, c, WI, WF, WC, WO, RWI, RWF, RWC, RWO, BI, + BF, BC, BO); + + // Write result + #pragma unroll + for (int x = 0; x < CONFIG_T::n_out; x++) { + hidden_state[i + 1][x] = h[x]; + cell_state[i + 1][x] = c[x]; + } + } + + if (CONFIG_T::return_sequences == 0) { + // Output when return_sequences is false + #pragma unroll + for (int x = 0; x < CONFIG_T::n_out; x++) { + res[x] = hidden_state[CONFIG_T::n_timesteps][x]; + } + } else { + // Output when return_sequences is true + #pragma unroll + for (int x = 0; x < CONFIG_T::n_timesteps; x++) { + for (int h = 0; h < CONFIG_T::n_out; h++) { + res[x * CONFIG_T::n_out + h] = hidden_state[x + 1][h]; + } + } + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent_activation.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent_activation.h new file mode 100644 index 0000000000..893fd027c1 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent_activation.h @@ -0,0 +1,47 @@ +#ifndef NNET_RECR_ACTIVATION_H_ +#define NNET_RECR_ACTIVATION_H_ + +#include "nnet_activation.h" +#include "nnet_common.h" + +namespace nnet { + +namespace activation { + +template class Activation { + public: + // ************************************************* + // Blank Activation + // ************************************************* + static void activation(const data_T &data, res_T &res) {} +}; + +template class relu : public Activation { + public: + // ************************************************* + // Relu Activation + // ************************************************* + static void activation(const data_T &data, res_T &res) { nnet::relu(data, res); } +}; + +template class sigmoid : public Activation { + public: + // ************************************************* + // Sigmoid Activation + // ************************************************* + static void activation(const data_T &data, res_T &res) { nnet::sigmoid(data, res); } +}; + +template class tanh : public Activation { + public: + // ************************************************* + // TanH Activation + // ************************************************* + static void activation(const data_T &data, res_T &res) { nnet::dense_tanh(data, res); } +}; + +} // namespace activation + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent_stream.h new file mode 100644 index 0000000000..7429419cda --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_recurrent_stream.h @@ -0,0 +1,68 @@ +#ifndef NNET_RECURRENT_STREAM_H_ +#define NNET_RECURRENT_STREAM_H_ + +#include "nnet_common.h" +#include "nnet_dense.h" +#include "nnet_recurrent_activation.h" + +namespace nnet { +template +void gru_stream(typename CONFIG_T::weight_t weights, typename CONFIG_T::recurrent_weight_t recurrent_weights, + typename CONFIG_T::bias_t bias, typename CONFIG_T::recurrent_bias_t recurrent_bias) { + + using data_T = typename ExtractPipeType::value_type; + using res_T = typename ExtractPipeType::value_type; + using h_T = array; + + constexpr auto datasize = std::tuple_size{}; + constexpr auto ressize = std::tuple_size{}; + + [[intel::fpga_register]] h_T h; + #pragma unroll + for (int i = 0; i < CONFIG_T::n_units; i++) { + h[i] = 0; + } + + [[intel::fpga_register]] data_T x; + +DataPropagation: + for (int i_in = 0; i_in < CONFIG_T::n_timesteps * CONFIG_T::n_in / datasize; i_in++) { + auto data_pack = data_pipe::read(); + + DataPack: + #pragma unroll + for (int i_pack = 0; i_pack < datasize; i_pack++) { + x[i_pack] = data_pack[i_pack]; + } + + nnet::gru_cell(x, h, weights, recurrent_weights, bias, recurrent_bias); + + if (CONFIG_T::return_sequences) { + res_T res_pack; + + ResPackRetSeq: + #pragma unroll + for (int i_pack = 0; i_pack < ressize; i_pack++) { + res_pack[i_pack] = h[i_pack]; + } + + res_pipe::write(res_pack); + } + } + + if (!CONFIG_T::return_sequences) { + res_T res_pack; + + ResPackNoRetSeq: + #pragma unroll + for (int i_pack = 0; i_pack < ressize; i_pack++) { + res_pack[i_pack] = h[i_pack]; + } + + res_pipe::write(res_pack); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_resize.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_resize.h new file mode 100644 index 0000000000..c461e337da --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_resize.h @@ -0,0 +1,36 @@ +#ifndef NNET_IMAGE_H_ +#define NNET_IMAGE_H_ + +namespace nnet { + +struct resize_config { + static const unsigned in_height = 10; + static const unsigned in_width = 10; + + static const unsigned out_height = 10; + static const unsigned out_width = 10; + + static const unsigned n_chan = 10; +}; + +template void resize_nearest(const data_T &image, res_T &resized) { + int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1; + int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1; + + for (int i = 0; i < CONFIG_T::new_height; i++) { + for (int j = 0; j < CONFIG_T::new_width; j++) { + int x = ((j * x_ratio) >> 16); + int y = ((i * y_ratio) >> 16); + + #pragma unroll + for (int k = 0; k < CONFIG_T::n_chan; k++) { + resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] = + image[(y * CONFIG_T::width * CONFIG_T::n_chan) + x * CONFIG_T::n_chan + k]; + } + } + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_resize_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_resize_stream.h new file mode 100644 index 0000000000..9a37f098e4 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_resize_stream.h @@ -0,0 +1,58 @@ +#ifndef NNET_IMAGE_STREAM_H_ +#define NNET_IMAGE_STREAM_H_ + +#include "nnet_common.h" + +namespace nnet { + +template void resize_nearest_stream() { + assert(CONFIG_T::new_height % CONFIG_T::height == 0); + assert(CONFIG_T::new_width % CONFIG_T::width == 0); + + using data_T = typename ExtractPipeType::value_type; + + constexpr unsigned ratio_height = CONFIG_T::new_height / CONFIG_T::height; + constexpr unsigned ratio_width = CONFIG_T::new_width / CONFIG_T::width; + +ImageHeight: + for (unsigned h = 0; h < CONFIG_T::height; h++) { + [[intel::fpga_register]] data_T data_in_row[CONFIG_T::width]; + + ImageWidth: + for (unsigned i = 0; i < CONFIG_T::width; i++) { + [[intel::fpga_register]] auto in_data = data_pipe::read(); + + ImageChan: + #pragma unroll + for (unsigned j = 0; j < CONFIG_T::n_chan; j++) { + data_in_row[i][j] = in_data[j]; + } + } + + ResizeHeight: + for (unsigned i = 0; i < ratio_height; i++) { + + ImageWidth2: + for (unsigned l = 0; l < CONFIG_T::width; l++) { + + ResizeWidth: + for (unsigned j = 0; j < ratio_width; j++) { + + [[intel::fpga_register]] data_T out_data; + + ResizeChan: + #pragma unroll + for (unsigned k = 0; k < CONFIG_T::n_chan; k++) { + out_data[k] = data_in_row[l][k]; + } + + res_pipe::write(out_data); + } + } + } + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_stream.h new file mode 100644 index 0000000000..6e5e86a581 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_stream.h @@ -0,0 +1,126 @@ +#ifndef NNET_CLONE_H +#define NNET_CLONE_H + +#include "nnet_common.h" + +namespace nnet { + +struct broadcast_config { + static const unsigned in_height = 10; + static const unsigned in_width = 10; + static const unsigned n_chan = 1; + static const unsigned n_dupl = 2; +}; + +template void clone_stream() { + using data_T = typename ExtractPipeType::value_type; + using res1_T = typename ExtractPipeType::value_type; + using res2_T = typename ExtractPipeType::value_type; + constexpr auto datasize = std::tuple_size{}; +CloneLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) { + data_T in_data = data_pipe::read(); + res1_T out_data1; + res2_T out_data2; + + ClonePack: + #pragma unroll + for (int j = 0; j < datasize; j++) { + out_data1[j] = in_data[j]; + out_data2[j] = in_data[j]; + } + + res1_pipe::write(out_data1); + res2_pipe::write(out_data2); + } +} + +template void clone_stream() { + using data_T = typename ExtractPipeType::value_type; + using res1_T = typename ExtractPipeType::value_type; + using res2_T = typename ExtractPipeType::value_type; + using res3_T = typename ExtractPipeType::value_type; + constexpr auto datasize = std::tuple_size{}; +CloneLoop: + [[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) { + data_T in_data = data_pipe::read(); + res1_T out_data1; + res2_T out_data2; + res3_T out_data3; + + ClonePack: + #pragma unroll + for (int j = 0; j < datasize; j++) { + out_data1[j] = in_data[j]; + out_data2[j] = in_data[j]; + out_data3[j] = in_data[j]; + } + + res1_pipe::write(out_data1); + res2_pipe::write(out_data2); + res3_pipe::write(out_data3); + } +} + +template void repack_stream() { + using data_T = typename ExtractPipeType::value_type; + using res_T = typename ExtractPipeType::value_type; + constexpr auto datasize = std::tuple_size{}; + constexpr auto ressize = std::tuple_size{}; + + if constexpr (datasize == ressize) { + [[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) { + + [[intel::fpga_memory]] auto in_data = data_pipe::read(); + [[intel::fpga_memory]] res_T out_data; + + #pragma unroll + for (int j = 0; j < datasize; j++) { + out_data[j] = in_data[j]; + } + + res_pipe::write(out_data); + } + } else if constexpr (datasize > ressize) { + constexpr unsigned pack_diff = datasize / ressize; + + for (int i = 0; i < N / datasize; i++) { + + [[intel::fpga_memory]] auto in_data = data_pipe::read(); + [[intel::fpga_memory]] res_T out_data; + + [[intel::initiation_interval(1)]] for (int j = 0; j < pack_diff; j++) { + + #pragma unroll + for (int k = 0; k < ressize; k++) { + out_data[k] = in_data[j * ressize + k]; + } + res_pipe::write(out_data); + } + } + } else { // datasize < ressize + [[intel::fpga_memory]] res_T out_data; + constexpr unsigned pack_diff = ressize / datasize; + unsigned pack_cnt = 0; + [[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) { + + [[intel::fpga_memory]] auto in_data = data_pipe::read(); + + #pragma unroll + for (int j = 0; j < datasize; j++) { + out_data[pack_cnt * datasize + j] = in_data[j]; + } + + if (pack_cnt == pack_diff - 1) { + res_pipe::write(out_data); + pack_cnt = 0; + } else { + pack_cnt++; + } + } + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_transpose.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_transpose.h new file mode 100644 index 0000000000..4c6e28c242 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_transpose.h @@ -0,0 +1,37 @@ +#ifndef NNET_TRANSPOSE_H_ +#define NNET_TRANSPOSE_H_ + +namespace nnet { + +struct transpose_config { + static constexpr unsigned dims = 0; + static constexpr unsigned N = 0; + + // Inherited struct should define these + // static constexpr std::array from_shape; + // static constexpr std::array to_shape; + // static constexpr std::array perm; + // static constexpr std::array perm_strides; +}; + +template unsigned transfer_idx(int index) { + // Given output idx in c-order flat array, return input idx + int idx = 0; + for (int i = CONFIG_T::dims - 1; i >= 0; i--) { + idx += (index % CONFIG_T::to_shape[i]) * CONFIG_T::perm_strides[i]; + index /= CONFIG_T::to_shape[i]; + } + return idx; +} + +template void transpose(const data_T &data, res_T &res) { + #pragma unroll + for (int i = 0; i < CONFIG_T::N; i++) { + int idx = transfer_idx(i); + res[i] = data[idx]; + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_transpose_stream.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_transpose_stream.h new file mode 100644 index 0000000000..6a7c19ce58 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_transpose_stream.h @@ -0,0 +1,39 @@ +#ifndef NNET_TRANSPOSE_STREAM_H_ +#define NNET_TRANSPOSE_STREAM_H_ + +namespace nnet { + +template void transpose_stream() { + + using data_T = typename ExtractPipeType::value_type; + using res_T = typename ExtractPipeType::value_type; + + constexpr auto data_size = std::tuple_size::value_type>{}; + constexpr auto res_size = std::tuple_size::value_type>{}; + + [[intel::fpga_register]] typename data_T::value_type data_array[CONFIG_T::N]; + + for (int i = 0; i < CONFIG_T::N / data_size; i++) { + [[intel::fpga_register]] data_T in_data = data_pipe::read(); + + #pragma unroll + for (int j = 0; j < data_size; j++) { + data_array[i * data_size + j] = typename data_T::value_type(in_data[j]); + } + } + + for (int i = 0; i < CONFIG_T::N / res_size; i++) { + [[intel::fpga_register]] res_T out_data; + + #pragma unroll + for (int j = 0; j < res_size; j++) { + out_data[j] = typename res_T::value_type(data_array[transfer_idx(i * res_size + j)]); + } + + res_pipe::write(out_data); + } +} + +} // namespace nnet + +#endif diff --git a/experiments/dense_streaming/src/firmware/nnet_utils/nnet_types.h b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_types.h new file mode 100644 index 0000000000..8cf883c1d5 --- /dev/null +++ b/experiments/dense_streaming/src/firmware/nnet_utils/nnet_types.h @@ -0,0 +1,71 @@ +#ifndef NNET_TYPES_H_ +#define NNET_TYPES_H_ + +#include +#include +#include +#include +#include +#include + +namespace nnet { + +// Define the pipe type that we use +template using array = std::array; + +// T should be an array +template constexpr T zero_array() { + T ar; + #pragma unroll + for (auto &a : ar) { + a = 0; + } + return ar; +} + +// This is a helper to extract the value_type of a pipe +template struct ExtractPipeType { typedef T value_type; }; + +template