Skip to content

Commit d82ccfe

Browse files
sarahmscroggstbetcke
authored
vectorize helmholtz_3d (#10)
* check dependency versions (#7) * check dependency versions * version++ * add release instructions (#8) * add -dev to version (#9) * add -dev to version * tweak release notes * vectorize helmholtz_3d --------- Co-authored-by: Matthew Scroggs <[email protected]> Co-authored-by: sarah <> Co-authored-by: Timo Betcke <[email protected]>
1 parent 6803224 commit d82ccfe

File tree

5 files changed

+1118
-32
lines changed

5 files changed

+1118
-32
lines changed

.github/workflows/run-tests.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,22 @@ jobs:
3636
run: cargo test --examples --release ${{ matrix.feature-flags }}
3737
- name: Test benchmarks build
3838
run: cargo bench --no-run --features "strict"
39+
40+
check-dependencies:
41+
name: Check dependencies
42+
runs-on: ubuntu-latest
43+
strategy:
44+
matrix:
45+
rust-version: ["stable"]
46+
steps:
47+
- name: Set up Rust
48+
uses: actions-rust-lang/setup-rust-toolchain@v1
49+
with:
50+
toolchain: ${{ matrix.rust-version }}
51+
components: rustfmt
52+
- name: Install cargo-upgrades
53+
run: cargo install cargo-upgrades
54+
- uses: actions/checkout@v3
55+
- name: Check that dependencies are up to date
56+
run:
57+
cargo upgrades

Cargo.toml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
[features]
2+
nightly = ["pulp/nightly"]
23
# Treat warnings as a build error.
34
strict = []
45

56
[package]
67
name = "green-kernels"
7-
version = "0.1.0"
8+
version = "0.1.0-dev"
89
edition = "2021"
910
authors = ["Timo Betcke <[email protected]>", "Matthew Scroggs <[email protected]>"]
1011
description = "Evaluation of Green's function kernels."
@@ -28,9 +29,11 @@ num = "0.4"
2829
num_cpus = "1"
2930
rlst = { git = "https://github.com/linalg-rs/rlst.git" }
3031
rand = "0.8.5"
31-
itertools = "0.12"
32-
pulp = "0.18.12"
33-
coe-rs = "0.1"
32+
itertools = { version = "0.12.1", default-features = false }
33+
coe-rs = "0.1.2"
34+
pulp = { version = "0.18.12" }
35+
bytemuck = "1.16.0"
36+
hexf = "0.2.1"
3437

3538
[dev-dependencies]
3639
criterion = { version = "0.3", features = ["html_reports"] }
@@ -43,12 +46,12 @@ cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"]
4346
wildcard_imports = "forbid"
4447

4548
[target.aarch64-apple-darwin.dev-dependencies]
46-
blas-src = { version = "0.9", features = ["accelerate"]}
47-
lapack-src = { version = "0.9", features = ["accelerate"]}
49+
blas-src = { version = "0.10", features = ["accelerate"]}
50+
lapack-src = { version = "0.10", features = ["accelerate"]}
4851

4952
[target.x86_64-unknown-linux-gnu.dev-dependencies]
50-
blas-src = { version = "0.9", features = ["blis"]}
51-
lapack-src = { version = "0.9", features = ["netlib"]}
53+
blas-src = { version = "0.10", features = ["blis"]}
54+
lapack-src = { version = "0.10", features = ["netlib"]}
5255

5356
[[bench]]
5457
name = "laplace_f32"

RELEASE.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Making a release
2+
3+
To make a new release of green-kernels, follow the following steps:
4+
5+
0) If you are yet to make a release on your current computer, run `cargo login` and copy an API
6+
key from https://crates.io/me
7+
8+
1) Checkout the branch `release` and merge the branch `main` into the branch `release`:
9+
```bash
10+
git checkout release
11+
git merge main
12+
```
13+
14+
2) Update the version number in `Cargo.toml`.
15+
The version numbers have the format `[x].[y].[z]`. If you are releasing a major
16+
version, you should increment `[x]` and set `[y]` and `[z]` to 0.
17+
If you are releasing a minor version, you should increment `[y]` and set `[z]`
18+
to zero. If you are releasing a bugfix, you should increment `[z]`.
19+
20+
3) In `Cargo.toml`, check that the `rlst` dependency is at the latest version.
21+
22+
4) Commit your changes and push to GitHub, and check that all the tests on CI pass.
23+
24+
5) [Create a release on GitHub](https://github.com/bempp/green-kernels/releases/new) from the `release` branch.
25+
The release tag and title should be `v[x].[y].[z]` (where `[x]`, `[y]` and `[z]` are as in step 2).
26+
In the "Describe this release" box, you should bullet point the main changes since the last
27+
release.
28+
29+
6) Run `cargo publish --dry-run`, then run `cargo package --list` and
30+
check that no unwanted extras have been included in the release.
31+
32+
7) If everything is working as expected, run `cargo publish`. This will push the new version to
33+
crates.io. Note: this cannot be undone, but you can use `cargo yank` to mark a version as
34+
unsuitable for use.
35+
36+
8) Open a pull request to `main` to update the version number in `Cargo.toml` to `[x].[y].[z]-dev`

src/helmholtz_3d.rs

Lines changed: 156 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,15 @@ use crate::helpers::{
44
};
55
use crate::traits::Kernel;
66
use crate::types::EvalType;
7+
use crate::RealScalar;
8+
use crate::{ComplexScalar, SimdFor};
79
use num::traits::FloatConst;
10+
use num::One;
811
use num::Zero;
12+
use pulp::Simd;
913
use rayon::prelude::*;
14+
use rlst::c32;
15+
use rlst::c64;
1016
use rlst::RlstScalar;
1117
use std::marker::PhantomData;
1218

@@ -251,8 +257,8 @@ where
251257
/// Evaluate Helmholtz kernel for one target
252258
pub fn evaluate_helmholtz_one_target<T: RlstScalar<Complex = T>>(
253259
eval_type: EvalType,
254-
target: &[<T as RlstScalar>::Real],
255-
sources: &[<T as RlstScalar>::Real],
260+
target: &[T::Real],
261+
sources: &[T::Real],
256262
charges: &[T],
257263
wavenumber: T::Real,
258264
result: &mut [T],
@@ -273,33 +279,159 @@ pub fn evaluate_helmholtz_one_target<T: RlstScalar<Complex = T>>(
273279

274280
match eval_type {
275281
EvalType::Value => {
276-
let mut my_result_real = <<T as RlstScalar>::Real as Zero>::zero();
277-
let mut my_result_imag = <<T as RlstScalar>::Real as Zero>::zero();
278-
for index in 0..nsources {
279-
diff0 = sources0[index] - target[0];
280-
diff1 = sources1[index] - target[1];
281-
diff2 = sources2[index] - target[2];
282-
let diff_norm = (diff0 * diff0 + diff1 * diff1 + diff2 * diff2).sqrt();
283-
let inv_diff_norm = {
284-
if diff_norm == zero_real {
285-
zero_real
286-
} else {
287-
one_real / diff_norm
282+
struct Impl<'a, T: RlstScalar> {
283+
wavenumber: T::Real,
284+
t0: T::Real,
285+
t1: T::Real,
286+
t2: T::Real,
287+
288+
sources0: &'a [T::Real],
289+
sources1: &'a [T::Real],
290+
sources2: &'a [T::Real],
291+
charges: &'a [T],
292+
}
293+
294+
impl<T: ComplexScalar> pulp::WithSimd for Impl<'_, T> {
295+
type Output = (T::Real, T::Real);
296+
297+
#[inline(always)]
298+
fn with_simd<S: pulp::Simd>(self, simd: S) -> Self::Output {
299+
use coe::Coerce;
300+
301+
let Self {
302+
wavenumber,
303+
t0,
304+
t1,
305+
t2,
306+
sources0,
307+
sources1,
308+
sources2,
309+
charges,
310+
} = self;
311+
312+
let (s0_head, s0_tail) = T::Real::as_simd_slice::<S>(sources0);
313+
let (s1_head, s1_tail) = T::Real::as_simd_slice::<S>(sources1);
314+
let (s2_head, s2_tail) = T::Real::as_simd_slice::<S>(sources2);
315+
316+
let len = s0_head.len();
317+
let n = std::mem::size_of::<<T::Real as RealScalar>::Scalars<S>>()
318+
/ std::mem::size_of::<T::Real>();
319+
let (c_head, c_tail) = charges.split_at(len * n);
320+
let c_head: &[[<T::Real as RealScalar>::Scalars<S>; 2]] =
321+
bytemuck::cast_slice(c_head);
322+
let c_tail: &[[T::Real; 2]] = bytemuck::cast_slice(c_tail);
323+
324+
#[inline(always)]
325+
fn impl_slice<T: ComplexScalar, S: Simd>(
326+
simd: S,
327+
wavenumber: T::Real,
328+
t0: T::Real,
329+
t1: T::Real,
330+
t2: T::Real,
331+
332+
sources0: &[<T::Real as RealScalar>::Scalars<S>],
333+
sources1: &[<T::Real as RealScalar>::Scalars<S>],
334+
sources2: &[<T::Real as RealScalar>::Scalars<S>],
335+
charges: &[[<T::Real as RealScalar>::Scalars<S>; 2]],
336+
) -> (T::Real, T::Real) {
337+
let simd = SimdFor::<T::Real, S>::new(simd);
338+
339+
let t0 = simd.splat(t0);
340+
let t1 = simd.splat(t1);
341+
let t2 = simd.splat(t2);
342+
let zero = simd.splat(T::Real::zero());
343+
let wavenumber = simd.splat(wavenumber);
344+
let mut acc_re = simd.splat(T::Real::zero());
345+
let mut acc_im = simd.splat(T::Real::zero());
346+
347+
for (&s0, &s1, &s2, &c) in
348+
itertools::izip!(sources0, sources1, sources2, charges)
349+
{
350+
let [c_re, c_im] = simd.deinterleave(c);
351+
352+
let diff0 = simd.sub(s0, t0);
353+
let diff1 = simd.sub(s1, t1);
354+
let diff2 = simd.sub(s2, t2);
355+
356+
let diff_norm = simd.sqrt(simd.mul_add(
357+
diff0,
358+
diff0,
359+
simd.mul_add(diff1, diff1, simd.mul(diff2, diff2)),
360+
));
361+
362+
let is_zero = simd.cmp_eq(diff_norm, zero);
363+
let inv_diff_norm = simd.select(
364+
is_zero,
365+
zero,
366+
simd.div(simd.splat(T::Real::one()), diff_norm),
367+
);
368+
let kr = simd.mul(wavenumber, diff_norm);
369+
370+
let (g_re, g_im) = {
371+
let (s, c) = simd.sin_cos(kr);
372+
(simd.mul(c, inv_diff_norm), simd.mul(s, inv_diff_norm))
373+
};
374+
375+
acc_re = simd.mul_add(
376+
g_re,
377+
c_re,
378+
simd.mul_add(simd.neg(g_im), c_im, acc_re),
379+
);
380+
acc_im = simd.mul_add(g_re, c_im, simd.mul_add(g_im, c_re, acc_im));
381+
}
382+
(simd.reduce_add(acc_re), simd.reduce_add(acc_im))
288383
}
289-
};
290384

291-
let kr = wavenumber * diff_norm;
385+
let (re0, im0) = impl_slice::<T, S>(
386+
simd, wavenumber, t0, t1, t2, s0_head, s1_head, s2_head, c_head,
387+
);
388+
let (re1, im1) = impl_slice::<T, pulp::Scalar>(
389+
pulp::Scalar::new(),
390+
wavenumber,
391+
t0,
392+
t1,
393+
t2,
394+
s0_tail.coerce(),
395+
s1_tail.coerce(),
396+
s2_tail.coerce(),
397+
c_tail.coerce(),
398+
);
292399

293-
let g_re = <T::Real as RlstScalar>::cos(kr) * inv_diff_norm;
294-
let g_im = <T::Real as RlstScalar>::sin(kr) * inv_diff_norm;
295-
let charge_re = charges[index].re();
296-
let charge_im = charges[index].im();
400+
(re0 + re1, im0 + im1)
401+
}
402+
}
297403

298-
my_result_imag += g_re * charge_im + g_im * charge_re;
299-
my_result_real += g_re * charge_re - g_im * charge_im;
404+
use coe::coerce_static as to;
405+
use coe::Coerce;
406+
if coe::is_same::<T, c32>() {
407+
let (re, im) = pulp::Arch::new().dispatch(Impl::<'_, c32> {
408+
wavenumber: to(wavenumber),
409+
t0: to(target[0]),
410+
t1: to(target[1]),
411+
t2: to(target[2]),
412+
sources0: sources0.coerce(),
413+
sources1: sources1.coerce(),
414+
sources2: sources2.coerce(),
415+
charges: charges.coerce(),
416+
});
417+
result[0] += T::Complex::complex(to::<_, T::Real>(re), to::<_, T::Real>(im))
418+
.mul_real(m_inv_4pi);
419+
} else if coe::is_same::<T, c64>() {
420+
let (re, im) = pulp::Arch::new().dispatch(Impl::<'_, c64> {
421+
wavenumber: to(wavenumber),
422+
t0: to(target[0]),
423+
t1: to(target[1]),
424+
t2: to(target[2]),
425+
sources0: sources0.coerce(),
426+
sources1: sources1.coerce(),
427+
sources2: sources2.coerce(),
428+
charges: charges.coerce(),
429+
});
430+
result[0] += T::Complex::complex(to::<_, T::Real>(re), to::<_, T::Real>(im))
431+
.mul_real(m_inv_4pi);
432+
} else {
433+
panic!()
300434
}
301-
result[0] += <T::Complex as RlstScalar>::complex(my_result_real, my_result_imag)
302-
.mul_real(m_inv_4pi);
303435
}
304436
EvalType::ValueDeriv => {
305437
// Cannot simply use an array my_result as this is not

0 commit comments

Comments
 (0)