codeplaysoftware · AD2605 · Nov 27, 2023 · Nov 28, 2023 · Nov 28, 2023 · Nov 29, 2023
diff --git a/.clang-tidy b/.clang-tidy
@@ -14,6 +14,7 @@ Checks: >
   performance-*,
   -performance-avoid-endl,
   readability-*,
+  -readability-magic-numbers,
   -readability-function-cognitive-complexity,
   -readability-identifier-length,
   -readability-named-parameter,

diff --git a/README.md b/README.md
@@ -98,10 +98,8 @@ portFFT is still in early development. The supported configurations are:
 * Arbitrary forward and backward offsets
 * Arbitrary strides and distance where the problem size + auxilary data fits in the registers of a single work-item.
 
-Any 1D arbitrarily large input size that fits in global memory is supported, with a restriction that large input sizes should not have large prime factors.
-The largest prime factor depend on the device and the values set by `PORTFFT_REGISTERS_PER_WI` and `PORTFFT_SUBGROUP_SIZES`.
-For instance with `PORTFFT_REGISTERS_PER_WI` set to `128` (resp. `256`) each work-item can hold a maximum of 27 (resp. 56) complex values, thus with `PORTFFT_SUBGROUP_SIZES` set to `32` the largest prime factor cannot exceed `27*32=864` (resp. `56*32=1792`).
-portFFT may allocate up to `2 * PORTFFT_MAX_CONCURRENT_KERNELS * input_size` scratch memory, depending on the configuration passed.
+Any 1D arbitrarily large input size that fits in global memory is supported.
+portFFT may allocate up to `2 * PORTFFT_MAX_CONCURRENT_KERNELS * input_size` scratch memory,  in addition to memory allocated to hold precomputed values to be used during compute, depending on the configuration passed.
 
 Any batch size is supported as long as the input and output data fits in global memory.
 

diff --git a/src/portfft/committed_descriptor_impl.hpp b/src/portfft/committed_descriptor_impl.hpp
diff --git a/src/portfft/common/bluestein.hpp b/src/portfft/common/bluestein.hpp
@@ -0,0 +1,76 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  Codeplay's portFFT
+ *
+ **************************************************************************/
+
+#ifndef PORTFFT_COMMON_BLUESTEIN_HPP
+#define PORTFFT_COMMON_BLUESTEIN_HPP
+
+#include "portfft/common/host_fft.hpp"
+#include "portfft/defines.hpp"
+
+#include <complex>
+#include <sycl/sycl.hpp>
+
+namespace portfft {
+namespace detail {
+/**
+ * Utility function to get the dft transform of the chirp signal
+ * @tparam T Scalar Type
+ * @param ptr Host Pointer containing the load/store modifiers.
- * @param ptr Host Pointer containing the load/store modifiers.
+ * @param store_modifiers Host pointer containing the load/store modifiers.
- * @param ptr Host Pointer containing the load/store modifiers.
+ * @param store_modifiers Host pointer containing the load/store modifiers.
+ * @param committed_size original problem size
+ * @param dimension_size padded size
+ */
+template <typename T>
+void get_fft_chirp_signal(T* ptr, std::size_t committed_size, std::size_t dimension_size) {
+  using complex_t = std::complex<T>;
+  std::vector<complex_t> chirp_signal(dimension_size, 0);
+  std::vector<complex_t> chirp_fft(dimension_size, 0);
+  for (std::size_t i = 0; i < committed_size; i++) {
+    double theta = M_PI * static_cast<double>(i * i) / static_cast<double>(committed_size);
+    chirp_signal[i] = complex_t(static_cast<T>(std::cos(theta)), static_cast<T>(std::sin(theta)));
+  }
+  std::size_t num_zeros = dimension_size - 2 * committed_size + 1;
+  for (std::size_t i = 0; i < committed_size; i++) {
+    chirp_signal[committed_size + num_zeros + i - 1] = chirp_signal[committed_size - i];
+  }
+  host_naive_dft(chirp_signal.data(), chirp_fft.data(), dimension_size);
+  std::memcpy(ptr, reinterpret_cast<T*>(chirp_fft.data()), 2 * dimension_size * sizeof(T));
+}
+
+/**
+ * Populates input modifiers required for bluestein
+ * @tparam T Scalar Type
+ * @param ptr Host Pointer containing the load/store modifiers.
+ * @param committed_size committed problem length
+ * @param dimension_size padded dft length
+ */
+template <typename T>
+void populate_bluestein_input_modifiers(T* ptr, std::size_t committed_size, std::size_t dimension_size) {
+  using complex_t = std::complex<T>;
+  std::vector<complex_t> scratch(dimension_size, 0);
+  for (std::size_t i = 0; i < committed_size; i++) {
+    double theta = -M_PI * static_cast<double>(i * i) / static_cast<double>(committed_size);
+    scratch[i] = complex_t(static_cast<T>(std::cos(theta)), static_cast<T>(std::sin(theta)));
+  }
+  std::memcpy(ptr, reinterpret_cast<T*>(scratch.data()), 2 * dimension_size * sizeof(T));
+}
+}  // namespace detail
+}  // namespace portfft
+
+#endif