tensorflow · AdrianLundell · Sep 19, 2024 · Nov 8, 2024 · Nov 13, 2024 · Nov 14, 2024
@@ -287,6 +287,7 @@ tflm_kernel_cc_library(
         "svdf_common.cc",
         "tanh.cc",
         "transpose.cc",
+        "transpose_common.cc",
         "transpose_conv.cc",
         "unidirectional_sequence_lstm.cc",
         "unpack.cc",
@@ -322,6 +323,7 @@ tflm_kernel_cc_library(
         "strided_slice.h",
         "sub.h",
         "svdf.h",
+        "transpose.h",
         "transpose_conv.h",
     ] + select({
         xtensa_fusion_f1_config(): glob(["xtensa/**/*.h"]),

@@ -0,0 +1,252 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+
+#include <limits>
+
+#include "Include/arm_nn_types.h"
+#include "Include/arm_nnfunctions.h"
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/pad.h"
+#include "tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+namespace {
+
+struct OpData {
+  PadParams params;
+  int32_t output_zero_point;
+};
+
+void* PadInit(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+TfLiteStatus PadEvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData* data = static_cast<const OpData*>(node->user_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, /*index=*/0);
+  const TfLiteEvalTensor* constant_values =
+      NumInputs(node) == 3
+          ? tflite::micro::GetEvalInput(context, node, /*index=*/2)
+          : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, /*index=*/0);
+
+  int8_t pad_value;
+  if (constant_values == nullptr) {
+    pad_value = static_cast<uint8_t>(data->output_zero_point);
+  } else {
+    pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
+  }
+  const int8_t* input_ptr = tflite::micro::GetTensorData<int8_t>(input);
+  int8_t* output_ptr = tflite::micro::GetTensorData<int8_t>(output);
+
+  const RuntimeShape d = tflite::micro::GetTensorShape(input);
+  const cmsis_nn_dims input_size = {d.Dims(0), d.Dims(1), d.Dims(2), d.Dims(3)};
+
+  const PadParams p = data->params;
+  const cmsis_nn_dims pre_pad = {p.left_padding[0], p.left_padding[1],
+                                 p.left_padding[2], p.left_padding[3]};
+  const cmsis_nn_dims post_pad = {p.right_padding[0], p.right_padding[1],
+                                  p.right_padding[2], p.right_padding[3]};
+
+  arm_pad_s8(input_ptr, output_ptr, pad_value, &input_size, &pre_pad,
+             &post_pad);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus PadEval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData* data = static_cast<const OpData*>(node->user_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, /*index=*/0);
+  const TfLiteEvalTensor* constant_values =
+      NumInputs(node) == 3
+          ? tflite::micro::GetEvalInput(context, node, /*index=*/2)
+          : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, /*index=*/0);
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      float pad_value =
+          constant_values == nullptr
+              ? 0.f
+              : *tflite::micro::GetTensorData<float>(constant_values);
+      if (data->params.resizing_category == ResizingCategory::kImageStyle) {
+        reference_ops::PadImageStyle(
+            data->params, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<float>(input), &pad_value,
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<float>(output));
+      } else {
+        reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                           tflite::micro::GetTensorData<float>(input),
+                           &pad_value, tflite::micro::GetTensorShape(output),
+                           tflite::micro::GetTensorData<float>(output));
+      }
+    } break;
+    case kTfLiteInt8: {
+      PadEvalInt8(context, node);
+    } break;
+    case kTfLiteInt16: {
+      int16_t pad_value =
+          constant_values == nullptr
+              ? 0
+              : *tflite::micro::GetTensorData<int16_t>(constant_values);
+      reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<int16_t>(input),
+                         &pad_value, tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<int16_t>(output));
+    } break;
+    case kTfLiteInt32: {
+      int32_t pad_value =
+          constant_values == nullptr
+              ? 0
+              : *tflite::micro::GetTensorData<int32_t>(constant_values);
+      reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<int32_t>(input),
+                         &pad_value, tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<int32_t>(output));
+    } break;
+    default:
+
+      MicroPrintf("Type %s not currently supported by Pad.",
+                  TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
+  TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, /*index=*/0);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* paddings =
+      micro_context->AllocateTempInputTensor(node, /*index=*/1);
+  TF_LITE_ENSURE(context, paddings != nullptr);
+  TfLiteTensor* constant_values =
+      NumInputs(node) == 3
+          ? micro_context->AllocateTempInputTensor(node, /*index=*/2)
+          : nullptr;
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, /*index=*/0);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+
+  // Current implementations rely on the inputs being <= 4D.
+  TF_LITE_ENSURE(context, NumDimensions(input) <=
+                              reference_ops::PadKernelMaxDimensionCount());
+
+  if (constant_values != nullptr) {
+    TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
+    // Ensure that constant_values is a scalar.
+    TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
+  }
+
+  // There must be a pair of paddings for each output dimension.
+  TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
+                    output->dims->size * 2);
+  // On Micro, outputs must be properly sized by the converter.
+  // NOTE: This data is only available because the paddings buffer is stored in
+  // the flatbuffer:
+  TF_LITE_ENSURE(context, IsConstantTensor(paddings));
+  const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
+  for (int i = 0; i < output->dims->size; i++) {
+    int output_dim = output->dims->data[i];
+    int expected_dim =
+        input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
+    TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
+  }
+
+  // Calculate OpData:
+  data->params.resizing_category = ResizingCategory::kGenericResize;
+  const int paddings_total = GetTensorShape(paddings).FlatSize();
+  if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
+      (paddings_data[6] == 0 && paddings_data[7] == 0)) {
+    data->params.resizing_category = ResizingCategory::kImageStyle;
+  }
+
+  const int num_input_dimensions = NumDimensions(input);
+  data->params.left_padding_count = num_input_dimensions;
+  data->params.right_padding_count = num_input_dimensions;
+
+  for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
+    data->params.left_padding[idx] = paddings_data[idx * 2];
+    data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
+  }
+
+  if (input->type == kTfLiteInt8) {
+    if (constant_values == nullptr) {
+      // Quantized Pad requires that 0 is represented in the quantized
+      // range.
+      TF_LITE_ENSURE(context, output->params.zero_point >=
+                                  std::numeric_limits<int8_t>::min());
+      TF_LITE_ENSURE(context, output->params.zero_point <=
+                                  std::numeric_limits<int8_t>::max());
+    } else {
+      // Quantized Pad requires that 'constant_values' is represented in the
+      // same quantized range as the input and output tensors.
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                        constant_values->params.zero_point);
+      TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
+                        static_cast<double>(constant_values->params.scale));
+    }
+    data->output_zero_point = output->params.zero_point;
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(paddings);
+  if (constant_values != nullptr) {
+    micro_context->DeallocateTempTfLiteTensor(constant_values);
+  }
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TFLMRegistration Register_PAD() {
+  return tflite::micro::RegisterOp(PadInit, PadPrepare, PadEval);
+}
+TFLMRegistration Register_PADV2() {
+  return tflite::micro::RegisterOp(PadInit, PadPrepare, PadEval);
+}
+TFLMRegistration Register_PAD_INT8() {
+  return tflite::micro::RegisterOp(PadInit, PadPrepare, PadEvalInt8);
+}
+
+}  // namespace tflite
@@ -0,0 +1,109 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+
+#include "Include/arm_nnfunctions.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/transpose.h"
+#include "tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+namespace {
+
+TfLiteStatus TransposeEvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* perm_tensor =
+      tflite::micro::GetEvalInput(context, node, kTransposePermTensor);
+  const int size = perm_tensor->dims->data[0];
+  TF_LITE_ENSURE(context, size <= 4);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kTransposeInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kTransposeOutputTensor);
+  const cmsis_nn_transpose_params transpose_params = {
+      size, reinterpret_cast<const uint32_t*>(perm_tensor->data.i32)};
+  cmsis_nn_dims input_dims = {
+      tflite::micro::GetTensorShape(input).DimsData()[0],
+      tflite::micro::GetTensorShape(input).DimsData()[1],
+      tflite::micro::GetTensorShape(input).DimsData()[2],
+      tflite::micro::GetTensorShape(input).DimsData()[3]};
+  cmsis_nn_dims output_dims = {
+      tflite::micro::GetTensorShape(output).DimsData()[0],
+      tflite::micro::GetTensorShape(output).DimsData()[1],
+      tflite::micro::GetTensorShape(output).DimsData()[2],
+      tflite::micro::GetTensorShape(output).DimsData()[3]};
+
+  TFLITE_DCHECK_EQ(
+      arm_transpose_s8(tflite::micro::GetTensorData<int8_t>(input),
+                       tflite::micro::GetTensorData<int8_t>(output),
+                       &input_dims, &output_dims, &transpose_params),
+      ARM_CMSIS_NN_SUCCESS);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus TransposeEval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* perm_tensor =
+      tflite::micro::GetEvalInput(context, node, kTransposePermTensor);
+  const int32_t* perm_data = perm_tensor->data.i32;
+  const int size = perm_tensor->dims->data[0];
+  TransposeParams params;
+  params.perm_count = size;
+  for (int i = 0; i < size; ++i) {
+    params.perm[i] = perm_data[i];
+  }
+
+  // Transpose kernel only does rearranging values not numeric evaluations
+  // on each cell. It's safe to implement per size of scalar type and this
+  // trick keeps the total code size in a reasonable range.
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kTransposeInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kTransposeOutputTensor);
+  switch (input->type) {
+    case kTfLiteFloat32:
+      reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
+                               tflite::micro::GetTensorData<float>(input),
+                               tflite::micro::GetTensorShape(output),
+                               tflite::micro::GetTensorData<float>(output));
+      break;
+    case kTfLiteInt8: {
+      TransposeEvalInt8(context, node);
+    } break;
+    default:
+      MicroPrintf(
+          "Type %s is currently not supported by Transpose. "
+          "Only float32 and int8 is supported",
+          TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TFLMRegistration Register_TRANSPOSE() {
+  return tflite::micro::RegisterOp(nullptr, TransposePrepare, TransposeEval);
+}
+TFLMRegistration Register_TRANSPOSE_INT8() {
+  return tflite::micro::RegisterOp(nullptr, TransposePrepare,
+                                   TransposeEvalInt8);
+}
+
+}  // namespace tflite