Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b4ebfee
VACUUM - WIP - removing green_only and adding a galerkin projection b…
jhalpern30 Mar 13, 2026
388240d
VACUUM - WIP - adding rough profiling for the code
jhalpern30 Mar 13, 2026
7733df9
VACUUM - WIP - fused kernel operations so full matrix is never stored…
jhalpern30 Mar 13, 2026
b70a4f1
Merge branch 'develop' of github.com:OpenFUSIONToolkit/GPEC into jmh/…
jhalpern30 Mar 13, 2026
2e1e728
VACUUM - WIP - condensing tri operations into a single complex operat…
jhalpern30 Mar 14, 2026
feb07e4
VACUUM - WIP - combining galerkin and fused galerkin into one main wi…
jhalpern30 Mar 14, 2026
0e3b7e5
VACUUM - WIP - consolidating cos/sin_mn_basis into exp_mn_basis and p…
jhalpern30 Mar 14, 2026
c29d416
VACUUM - WIP - wall implementation of the galerkin method (working fo…
jhalpern30 Mar 14, 2026
98851de
VACUUM - WIP - renaming matrices
jhalpern30 Mar 16, 2026
d872ce9
VACUUM - WIP - removing the non-fused galerkin method
jhalpern30 Mar 16, 2026
eb0e5f7
temp
jhalpern30 Mar 16, 2026
afbc933
VACUUM - WIP - modifying kernels to take views of larger K and G matr…
jhalpern30 Mar 16, 2026
4925412
VACUUM - WIP - consolidating nowall and wall into one branch for gale…
jhalpern30 Mar 16, 2026
8bda420
VACUUM - WIP - optimizing the fused Galerkin code (mostly 3D benefits)
jhalpern30 Mar 16, 2026
b185e95
VACUUM - WIP - removing timers, adding benchmark
jhalpern30 Mar 16, 2026
0c5ef1c
VACUUM - WIP - simplifying some math in Free.jl
jhalpern30 Mar 16, 2026
8c2a78a
EXAMPLES - IMPROVEMENT - small cleanups to example tomls
jhalpern30 Mar 17, 2026
52cceab
VACUUM - WIP - mergint the projected kernel functions into the regula…
jhalpern30 Mar 17, 2026
f96030c
VACUUM - IMPROVEMENT - full implementation of the projected kernel in…
jhalpern30 Mar 17, 2026
8454255
VACUUM - IMPROVEMENT - updating the main docstring
jhalpern30 Mar 17, 2026
c719a6b
VACUUM - IMPROVEMENT - adding back in the logic that only allotes the…
jhalpern30 Mar 17, 2026
4c001f4
VACUUM - IMPROVEMENT - using a vector for the diagonal of the gram ma…
jhalpern30 Mar 18, 2026
46f24d9
VACUUM - IMPROVEMENT - merging the single and double layer kernels to…
jhalpern30 Mar 18, 2026
c85493e
VACUUM - IMPROVEMENT - combining kernels into one operation, reduces …
jhalpern30 Mar 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 23 additions & 21 deletions benchmarks/benchmark_fourier_transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,21 @@ function extract_modes(fft_result, mlow, mhigh, mtheta)
for (i, m) in enumerate(mlow:mhigh)
if m >= 0
# Positive frequencies
modes[i] = fft_result[m + 1] / mtheta # FFT normalization
modes[i] = fft_result[m+1] / mtheta # FFT normalization
else
# Negative frequencies (wrap around)
modes[i] = fft_result[mtheta + m + 1] / mtheta
modes[i] = fft_result[mtheta+m+1] / mtheta
end
end
return modes
end

# Test configurations
test_cases = [
(name="Small (mtheta=128, mpert=10)", mtheta=128, mpert=10, mlow=-5),
(name="Medium (mtheta=256, mpert=20)", mtheta=256, mpert=20, mlow=-10),
(name="Large (mtheta=480, mpert=40)", mtheta=480, mpert=40, mlow=-20),
(name="Very Large (mtheta=1024, mpert=80)", mtheta=1024, mpert=80, mlow=-40),
(name="Small (mtheta=128, mpert=10)", mtheta=128, mpert=10, mlow=-5),
(name="Medium (mtheta=256, mpert=20)", mtheta=256, mpert=20, mlow=-10),
(name="Large (mtheta=480, mpert=40)", mtheta=480, mpert=40, mlow=-20),
(name="Very Large (mtheta=1024, mpert=80)", mtheta=1024, mpert=80, mlow=-40)
]

for test in test_cases
Expand All @@ -56,7 +56,7 @@ for test in test_cases
mhigh = mlow + mpert - 1

# Create test data
theta = range(0, 2π, length=mtheta+1)[1:end-1]
theta = range(0, 2π; length=mtheta+1)[1:(end-1)]
data = sin.(3 .* theta) .+ 0.5 .* cos.(7 .* theta) .+ 0.2 .* sin.(11 .* theta)

# Initialize FourierTransform
Expand All @@ -67,7 +67,8 @@ for test in test_cases
theta_buffer = zeros(ComplexF64, mtheta)

# Pre-allocate for low-level API
cslth, snlth = compute_fourier_coefficients(mtheta, mpert, mlow)
exp_mn_basis = compute_fourier_coefficients(mtheta, mpert, mlow)
cslth, snlth = real(exp_mn_basis), imag(exp_mn_basis)
gij = reshape(data, mtheta, 1) # Matrix form
gil = zeros(Float64, mtheta, mpert)

Expand Down Expand Up @@ -99,7 +100,7 @@ for test in test_cases
# Note: Our transform uses a different normalization and basis
println("\n--- Accuracy Check ---")
println("FourierTransform allocating vs in-place: ",
@sprintf("%.2e", maximum(abs.(modes_alloc .- modes_buffer))))
@sprintf("%.2e", maximum(abs.(modes_alloc .- modes_buffer))))

# Compare magnitudes of modes (since basis might differ)
println("Mode magnitudes comparison (FourierTransform vs FFTW):")
Expand Down Expand Up @@ -129,9 +130,9 @@ for test in test_cases
full_modes = zeros(ComplexF64, mtheta)
for (i, m) in enumerate(mlow:mhigh)
if m >= 0
full_modes[m + 1] = modes_test[i]
full_modes[m+1] = modes_test[i]
else
full_modes[mtheta + m + 1] = modes_test[i]
full_modes[mtheta+m+1] = modes_test[i]
end
end
t6 = @benchmark ifft($full_modes)
Expand All @@ -140,21 +141,21 @@ for test in test_cases
# Accuracy check
println("\n--- Inverse Accuracy Check ---")
println("inverse() allocating vs in-place: ",
@sprintf("%.2e", maximum(abs.(theta_alloc .- theta_buffer))))
@sprintf("%.2e", maximum(abs.(theta_alloc .- theta_buffer))))
println("Round-trip error (real part): ",
@sprintf("%.2e", maximum(abs.(real.(theta_alloc) .- data))))
@sprintf("%.2e", maximum(abs.(real.(theta_alloc) .- data))))

# Performance summary
println("\n--- Performance Summary ---")
println(@sprintf("Forward transform speedup (in-place vs allocating): %.2fx",
median(t1).time / median(t2).time))
median(t1).time / median(t2).time))
println(@sprintf("Allocations eliminated: %d → %d",
t1.allocs, t2.allocs))
t1.allocs, t2.allocs))

# Compare to FFTW
println(@sprintf("\nFourier vs FFTW (forward): %.2fx %s",
abs(median(t2).time / median(t3).time),
median(t2).time < median(t3).time ? "faster" : "slower"))
abs(median(t2).time / median(t3).time),
median(t2).time < median(t3).time ? "faster" : "slower"))
println("Note: FFTW computes full DFT (all N modes), we compute truncated series ($mpert modes)")
end

Expand All @@ -169,7 +170,7 @@ mlow = -10
nbatch = 10 # Transform 10 functions simultaneously

ft = FourierTransform(mtheta, mpert, mlow)
theta = range(0, 2π, length=mtheta+1)[1:end-1]
theta = range(0, 2π; length=mtheta+1)[1:(end-1)]

# Create batch data
data_matrix = zeros(Float64, mtheta, nbatch)
Expand All @@ -182,7 +183,7 @@ modes_matrix = zeros(ComplexF64, mpert, nbatch)
println("\nTransforming $nbatch functions of length $mtheta:")

print("Allocating (loop): ")
@btime for i in 1:$nbatch
@btime for i in 1:($nbatch)
modes = $ft($data_matrix[:, i])
end

Expand All @@ -191,7 +192,7 @@ print("Allocating (matrix):")

print("In-place (loop): ")
modes_buffer = zeros(ComplexF64, mpert)
@btime for i in 1:$nbatch
@btime for i in 1:($nbatch)
transform!($modes_buffer, $ft, $data_matrix[:, i])
end

Expand All @@ -208,7 +209,8 @@ mpert = 10
mlow = 1

# Setup for low-level API
cslth, snlth = compute_fourier_coefficients(mtheta, mpert, mlow)
exp_mn_basis = compute_fourier_coefficients(mtheta, mpert, mlow)
cslth, snlth = real(exp_mn_basis), imag(exp_mn_basis)
gij = randn(mtheta, mtheta) # Green's function matrix
gil = zeros(Float64, mtheta, mpert)

Expand Down
Loading
Loading