Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding HPL experiment class #527

Merged
merged 27 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,33 @@ jobs:
system_name: tioga
system_spec: llnl-elcapitan rocm=5.5.1 compiler=cce +gtl

- name: hpl/mpi caliper=mpi,time tioga llnl-elcapitan rocm=5.5.1 compiler=cce +gtl blas=intel-oneapi-mkl
uses: ./.github/actions/dynamic-dry-run
with:
benchmark_name: hpl
benchmark_mode: mpi
benchmark_spec: hpl caliper=mpi,time
system_name: tioga
system_spec: llnl-elcapitan rocm=5.5.1 compiler=cce +gtl blas=intel-oneapi-mkl

- name: hpl/mpi caliper=mpi,time ruby llnl-cluster cluster=ruby compiler=gcc
uses: ./.github/actions/dynamic-dry-run
with:
benchmark_name: hpl
benchmark_mode: mpi
benchmark_spec: hpl caliper=mpi,time
system_name: ruby
system_spec: llnl-cluster cluster=ruby compiler=gcc

- name: hpl/mpi caliper=mpi,time lassen llnl-sierra cuda=11-8-0 compiler=clang-ibm blas=essl
uses: ./.github/actions/dynamic-dry-run
with:
benchmark_name: hpl
benchmark_mode: mpi
benchmark_spec: hpl caliper=mpi,time
system_name: lassen
system_spec: llnl-sierra cuda=11-8-0 compiler=clang-ibm blas=essl

- name: stream/mpi caliper=mpi,time ruby llnl-cluster cluster=ruby
uses: ./.github/actions/dynamic-dry-run
with:
Expand Down
115 changes: 115 additions & 0 deletions experiments/hpl/experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

from benchpark.error import BenchparkError
from benchpark.directives import variant
from benchpark.experiment import Experiment
from benchpark.scaling import StrongScaling
from benchpark.scaling import WeakScaling
from benchpark.openmp import OpenMPExperiment
from benchpark.caliper import Caliper


class Hpl(
Experiment,
StrongScaling,
WeakScaling,
OpenMPExperiment,
Caliper,
):

variant(
"workload",
default="standard",
description="workload to use",
)

variant(
"version",
default="2.3-caliper",
description="app version",
)

def compute_applications_section(self):
# TODO: Replace with conflicts clause
scaling_modes = {
"strong": self.spec.satisfies("+strong"),
"single_node": self.spec.satisfies("+single_node"),
"weak": self.spec.satisfies("+weak"),
}

scaling_mode_enabled = [key for key, value in scaling_modes.items() if value]
if len(scaling_mode_enabled) != 1:
print(scaling_mode_enabled)
raise BenchparkError(
f"Only one type of scaling per experiment is allowed for application package {self.name}"
)

# Number of initial nodes
num_nodes = {"n_nodes": 1}
problem_size = {"Ns": 10000}

self.add_experiment_variable("N-Grids", 1, False)
self.add_experiment_variable("Ps", "4 * {n_nodes}", True)
self.add_experiment_variable("Qs", "8", False)

self.add_experiment_variable("N-Ns", 1, False)

self.add_experiment_variable("N-NBs", 1, False)
self.add_experiment_variable("NBs", 128, False)

self.add_experiment_variable(
"n_ranks", "{sys_cores_per_node} * {n_nodes}", False
)
self.add_experiment_variable("n_threads_per_proc", ["2"], True)

self.matrix_experiment_variables("n_threads_per_proc")

if self.spec.satisfies("+single_node"):
for pk, pv in num_nodes.items():
self.add_experiment_variable(pk, pv, True)
for pk, pv in problem_size.items():
self.add_experiment_variable(pk, pv, True)

elif self.spec.satisfies("+strong"):
scaled_variables = self.generate_strong_scaling_params(
{tuple(num_nodes.keys()): list(num_nodes.values())},
int(self.spec.variants["scaling-factor"][0]),
int(self.spec.variants["scaling-iterations"][0]),
)
for pk, pv in scaled_variables.items():
self.add_experiment_variable(pk, pv, True)
for pk, pv in problem_size.items():
self.add_experiment_variable(pk, pv, True)
elif self.spec.satisfies("+weak"):
scaled_variables = self.generate_weak_scaling_params(
{tuple(num_nodes.keys()): list(num_nodes.values())},
{tuple(problem_size.keys()): list(problem_size.values())},
int(self.spec.variants["scaling-factor"][0]),
int(self.spec.variants["scaling-iterations"][0]),
)
num_resources = scaled_variables["n_nodes"]
self.add_experiment_variable("n_nodes", num_resources, True)

problem_size = scaled_variables["Ns"]
self.add_experiment_variable("Ns", problem_size, True)

def compute_spack_section(self):
# get package version
app_version = self.spec.variants["version"][0]

# get system config options
# TODO: Get compiler/mpi/package handles directly from system.py
system_specs = {}
system_specs["compiler"] = "default-compiler"
system_specs["mpi"] = "default-mpi"
system_specs["blas"] = "blas"

# set package spack specs
# empty package_specs value implies external package
self.add_spack_spec(system_specs["mpi"])
self.add_spack_spec(system_specs["blas"])

self.add_spack_spec(self.name, [f"hpl@{app_version}", system_specs["compiler"]])