trixi-framework · benegee · Aug 22, 2024 · Aug 30, 2024 · Aug 30, 2024 · Aug 30, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -90,8 +90,8 @@ jobs:
             arch: x64
             trixi_test: threaded_legacy
           - version: '1.10'
-            os: macos-13
-            arch: x64
+            os: macos-latest
+            arch: aarch64
             trixi_test: mpi
           - version: '1.10'
             os: macos-latest
@@ -113,6 +113,7 @@ jobs:
           arch: ${{ matrix.arch }}
       - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
       - uses: julia-actions/cache@v2
+      - run: julia --project=@. -e 'using Pkg; pkg"add MPI#1dc1740"'
       - uses: julia-actions/julia-buildpkg@v1
         env:
           PYTHON: ""

diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl
@@ -284,6 +284,11 @@ end
 # when using `@fastmath`, which we also get from
 # [Fortran](https://godbolt.org/z/Yrsa1js7P)
 # or [C++](https://godbolt.org/z/674G7Pccv).
+#
+# Note however that such a custom reimplementation can cause incompatibilities with other
+# packages. Currently we are affected by an issue with MPI.jl on ARM, see
+# https://github.com/trixi-framework/Trixi.jl/issues/1922
+# The workaround is to resort to Base.min / Base.max when using MPI reductions.
 """
     Trixi.max(x, y, ...)
 

diff --git a/src/auxiliary/mpi.jl b/src/auxiliary/mpi.jl
@@ -128,3 +128,10 @@ parallel execution of Trixi.jl.
 See the "Miscellaneous" section of the [documentation](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/).
 """
 ode_unstable_check(dt, u, semi, t) = isnan(dt)
+
+# Custom MPI operators to work around
+# https://github.com/trixi-framework/Trixi.jl/issues/1922
+function reduce_vector_plus(x, y)
+    x .+ y
+end
+MPI.@Op(reduce_vector_plus, Any)
diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
@@ -434,7 +434,8 @@ function (analysis_callback::AnalysisCallback)(io, du, u, u_ode, t, semi)
             res = maximum(abs, view(du, v, ..))
             if mpi_isparallel()
                 # TODO: Debugging, here is a type instability
-                global_res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm())
+                # Base.max instead of max needed, see comment in src/auxiliary/math.jl
+                global_res = MPI.Reduce!(Ref(res), Base.max, mpi_root(), mpi_comm())
                 if mpi_isroot()
                     res::eltype(du) = global_res[]
                 end

diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -131,7 +131,8 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    # Base.max instead of max needed, see comment in src/auxiliary/math.jl
+    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         l2_error = convert(typeof(l2_error), global_l2_error)
@@ -161,7 +162,8 @@ function integrate_via_indices(func::Func, u,
                             normalize = normalize)
 
     # OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
-    global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
+    global_integral = MPI.Reduce!(Ref(local_integral), reduce_vector_plus, mpi_root(),
+                                  mpi_comm())
     if mpi_isroot()
         integral = convert(typeof(local_integral), global_integral[])
     else
@@ -194,7 +196,8 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    global_integral = MPI.Reduce!(Ref(integral), reduce_vector_plus, mpi_root(),
+                                  mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         integral = convert(typeof(integral), global_integral[])

diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -49,7 +49,8 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    # Base.max instead of max needed, see comment in src/auxiliary/math.jl
+    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         l2_error = convert(typeof(l2_error), global_l2_error)
@@ -87,7 +88,8 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    global_integral = MPI.Reduce!(Ref(integral), reduce_vector_plus, mpi_root(),
+                                  mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         integral = convert(typeof(integral), global_integral[])

diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl
@@ -54,7 +54,8 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -70,7 +71,8 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -154,7 +156,8 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -170,7 +173,8 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -186,7 +190,8 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -202,7 +207,8 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end

diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl
@@ -130,7 +130,8 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -146,7 +147,8 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -162,7 +164,8 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -178,7 +181,8 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end