Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
db512c3
Add task list timing code
lroberts36 Oct 20, 2025
62cfccc
clean up
lroberts36 Oct 21, 2025
9cd59aa
remove unused
lroberts36 Oct 21, 2025
351c813
Merge branch 'develop' into lroberts36/task-list-timing
lroberts36 Oct 22, 2025
c43fb6a
change build boundary buffers logic to pre-allocate a more thoughtful…
jonahm-LANL Oct 13, 2025
a00f508
pull out unified comms
jonahm-LANL Oct 13, 2025
eb76052
switch to weak pointers
lroberts36 Oct 13, 2025
238ecec
only allocate buffers for sprase vars that are allocated
jonahm-LANL Oct 13, 2025
f33eb49
comm buffer reset cadence
jonahm-LANL Oct 16, 2025
621429c
reallocate -> reset
jonahm-LANL Oct 16, 2025
51f6ae5
why did the CI linter catch this but not make lint?
jonahm-LANL Oct 17, 2025
41fe15f
add missing defaulted constructor to bnd_id
jonahm-LANL Oct 17, 2025
11fc6fb
CHANGELOG
jonahm-LANL Oct 17, 2025
ea05725
CC
jonahm-LANL Oct 17, 2025
eb0b53a
param docs
jonahm-LANL Oct 17, 2025
556aae7
what is this instrumentation crud doing here??
jonahm-LANL Oct 17, 2025
b7a4a2a
also bnd-info
jonahm-LANL Oct 17, 2025
ca643c1
and semicolon
jonahm-LANL Oct 17, 2025
853a956
come on
jonahm-LANL Oct 17, 2025
d753418
apparently kokkos_defaulted_function doesnt work for destructors on HIP?
jonahm-LANL Oct 17, 2025
cf36e1d
Im grasping at straws here. wtf.
jonahm-LANL Oct 17, 2025
a15fd49
There we go now it works
jonahm-LANL Oct 17, 2025
e19c756
ok try this
jonahm-LANL Oct 17, 2025
1f9978e
OK IT WORKS
jonahm-LANL Oct 17, 2025
7ef9638
put things where they belong
jonahm-LANL Oct 17, 2025
f26633d
fix for index split for AMD
jonahm-LANL Oct 20, 2025
4d1b63a
Update doc/sphinx/src/boundary_communication.rst
Yurlungur Oct 20, 2025
56761bb
lroberts comments
jonahm-LANL Oct 20, 2025
4473cb9
parthenon enable gpu macro
jonahm-LANL Oct 20, 2025
64b6017
oops true -> false
jonahm-LANL Oct 21, 2025
0e3cd60
pgrete comments part 1
jonahm-LANL Oct 21, 2025
33484a7
Add control over whether to include/exclude an output on final signal…
pgrete Oct 24, 2025
8b3c76d
clean up annoying warning
jonahm-LANL Oct 29, 2025
3a6e0d8
use error handling macros
jonahm-LANL Oct 29, 2025
0633d0c
changelog
jonahm-LANL Oct 29, 2025
6c24a6c
a restart on the testing framework
lroberts36 Nov 10, 2025
bfb16c8
move to list
lroberts36 Nov 10, 2025
b0c26a1
working local sync
lroberts36 Nov 11, 2025
132e506
add some comments
lroberts36 Nov 11, 2025
ade0663
remove print statements
lroberts36 Nov 11, 2025
9ae1c4a
format and lint
lroberts36 Nov 11, 2025
c0e0ce4
Merge branch 'develop' into lroberts36/task-list-timing
lroberts36 Nov 11, 2025
273feae
add timing unit test
lroberts36 Nov 11, 2025
44cc4e5
Add json writing capability
lroberts36 Nov 11, 2025
b3f0a5a
Allow for globally turning off timing
lroberts36 Nov 12, 2025
4e62298
changelog
lroberts36 Nov 12, 2025
7917e4a
Merge branch 'develop' into lroberts36/task-list-timing
lroberts36 Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Current develop

### Added (new features/APIs/variables/...)
- [[PR 1337]](https://github.com/parthenon-hpc-lab/parthenon/pull/1337) Add task list based timing capabilities
- [[PR 1331]](https://github.com/parthenon-hpc-lab/parthenon/pull/1331) Add control over whether to include/exclude an output on final signal
- [[PR 1330]](https://github.com/parthenon-hpc-lab/parthenon/pull/1330) Add userspace mechanisms to control number of comm buffers allocated
- [[PR 1319]](https://github.com/parthenon-hpc-lab/parthenon/pull/1319) Add common scratch variable utilities
Expand Down
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ add_library(parthenon
solvers/solver_utils.hpp
solvers/tridiag_solver.hpp

tasks/task_timing.cpp
tasks/task_timing.hpp
tasks/tasks.cpp
tasks/tasks.hpp
tasks/thread_pool.hpp
Expand Down
109 changes: 109 additions & 0 deletions src/tasks/task_timing.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
//========================================================================================
// (C) (or copyright) 2023-2025. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001 for Los
// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
// for the U.S. Department of Energy/National Nuclear Security Administration. All rights
// in the program are reserved by Triad National Security, LLC, and the U.S. Department
// of Energy/National Nuclear Security Administration. The Government is granted for
// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
// license in this material to reproduce, prepare derivative works, distribute copies to
// the public, perform publicly and display publicly, and to permit others to do so.
//========================================================================================

#include <algorithm>
#include <cstdio>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <memory>
#include <regex>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "task_timing.hpp"
#include "tasks.hpp"

namespace parthenon {

void TimingAccumulator::CollectTask(Task *task) {
ntasks++;
task->time_task = true;
task->timing_accumulators.push_back(shared_from_this());
}

void TimingAccumulator::CollectTaskIfCollecting(Task *task) {
if (collecting) CollectTask(task);
}

Real TimingAccumulator::GetTotalTime() const {
Real total_time{0.0};
for (auto &[start, end, status] : timings)
total_time += GetDurationInSeconds(start, end);
return total_time;
}

std::shared_ptr<TimingAccumulator>
TimingAccumulatorDictionary::GetOrAddAndRegister(const std::string &label, TaskList &tl) {
if (dict_.count(label) == 0) dict_[label] = TimingAccumulator::create();
tl.RegisterTimingAccumulator(dict_[label]);
return dict_[label];
}

void TimingAccumulatorDictionary::WriteToJSON(const std::string &filename) {
std::map<std::string, std::vector<std::pair<double, double>>> timings;

// First, find the minimum time to set zero
TimingAccumulator::time_t min_time = std::chrono::steady_clock::now();
for (auto &[name, taccum] : dict_) {
for (const auto &timing : taccum->GetTimings()) {
min_time = std::min(min_time, std::get<0>(timing));
}
}

// Now, go through and build the map that can be interpreted by python
for (auto &[name, taccum] : dict_) {
timings[name] = std::vector<std::pair<double, double>>();
for (const auto &timing : taccum->GetTimings()) {
const double start = taccum->GetDurationInSeconds(min_time, std::get<0>(timing));
const double end = taccum->GetDurationInSeconds(min_time, std::get<1>(timing));
timings[name].push_back(std::make_pair(start, end));
}
}

std::ofstream file(filename);
file << "{";

bool firstKey = true;
for (const auto &[key, value] : timings) {
if (!firstKey) {
file << ",";
}
firstKey = false;

file << "\"" << key << "\":[";

bool firstPair = true;
for (const auto &pair : value) {
if (!firstPair) {
file << ",";
}
firstPair = false;

// Write pair as JSON array [first, second]
// Use high precision to preserve double values
file << "[" << std::fixed << std::setprecision(15) << pair.first << ","
<< pair.second << "]";
}

file << "]";
}

file << "}";
file.close();
}

} // namespace parthenon
113 changes: 113 additions & 0 deletions src/tasks/task_timing.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
//========================================================================================
// (C) (or copyright) 2023-2025. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001 for Los
// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
// for the U.S. Department of Energy/National Nuclear Security Administration. All rights
// in the program are reserved by Triad National Security, LLC, and the U.S. Department
// of Energy/National Nuclear Security Administration. The Government is granted for
// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
// license in this material to reproduce, prepare derivative works, distribute copies to
// the public, perform publicly and display publicly, and to permit others to do so.
//========================================================================================
#ifndef TASKS_TASK_TIMING_HPP_
#define TASKS_TASK_TIMING_HPP_

#include <algorithm>
#include <array>
#include <cassert>
#include <chrono>
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#include <basic_types.hpp>
#include <parthenon_mpi.hpp>

#include "utils/error_checking.hpp"

namespace parthenon {

class Task;
class TimingAccumulator : public std::enable_shared_from_this<TimingAccumulator> {
public:
using time_t = std::chrono::time_point<std::chrono::steady_clock>;
using timing_chunk_t = std::tuple<time_t, time_t, TaskStatus>;

private:
bool collecting{false};
std::vector<timing_chunk_t> timings;
int ntasks{0};

class private_t {};

public:
explicit TimingAccumulator(private_t) {}

static std::shared_ptr<TimingAccumulator> create() {
return std::make_shared<TimingAccumulator>(private_t());
}

void AddTiming(const timing_chunk_t &timing) { timings.push_back(timing); }

void StopCollectingTasks() { collecting = false; }
void StartCollectingTasks() { collecting = true; }

void CollectTask(Task *task);
void CollectTaskIfCollecting(Task *task);

double GetDurationInSeconds(time_t start, time_t end) const {
return 1.e-9 *
static_cast<double>(
std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count());
}

Real GetTotalTime() const;

int GetTotalTasks() const { return ntasks; }

const std::vector<timing_chunk_t> &GetTimings() const { return timings; }
};

struct TimingAccumulatorGuard {
explicit TimingAccumulatorGuard(std::shared_ptr<TimingAccumulator> timing_accumulator)
: tidc(timing_accumulator) {
tidc->StartCollectingTasks();
}
~TimingAccumulatorGuard() { tidc->StopCollectingTasks(); }
std::shared_ptr<TimingAccumulator> tidc;
};

class TaskList;
class TimingAccumulatorDictionary {
std::map<std::string, std::shared_ptr<TimingAccumulator>> dict_;

public:
std::shared_ptr<TimingAccumulator> GetOrAddAndRegister(const std::string &label,
TaskList &tl);

std::shared_ptr<TimingAccumulator> Get(const std::string &label) {
PARTHENON_REQUIRE(dict_.count(label) > 0, "Asking for non-existent timing region.");
return dict_[label];
Comment on lines +98 to +99
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just do this?

Suggested change
PARTHENON_REQUIRE(dict_.count(label) > 0, "Asking for non-existent timing region.");
return dict_[label];
return dict_.at(label);

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like having the descriptive error message and I think this isn't performance critical.

}

void clear() { dict_.clear(); }
auto begin() { return dict_.begin(); }
auto end() { return dict_.end(); }
auto begin() const { return dict_.begin(); }
auto end() const { return dict_.end(); }

void WriteToJSON(const std::string &file_name);
};

} // namespace parthenon

#endif // TASKS_TASK_TIMING_HPP_
12 changes: 12 additions & 0 deletions src/tasks/tasks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,19 @@ TaskID TaskID::operator|(const TaskID &other) const {
}

TaskStatus Task::operator()() {
TimingAccumulator::time_t start;
if (time_task && enable_timing) {
Kokkos::fence();
start = std::chrono::steady_clock::now();
}
auto status = f();
if (time_task && enable_timing) {
Kokkos::fence();
TimingAccumulator::time_t end = std::chrono::steady_clock::now();
TimingAccumulator::timing_chunk_t timing_chunk = std::make_tuple(start, end, status);
for (auto &tc : timing_accumulators)
tc->AddTiming(timing_chunk);
}
if (verbose_level_ > 0)
printf("%s [status = %i, rank = %i]\n", label_.c_str(), static_cast<int>(status),
Globals::my_rank);
Expand Down
20 changes: 20 additions & 0 deletions src/tasks/tasks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
#include <algorithm>
#include <array>
#include <cassert>
#include <chrono>
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <unordered_map>
Expand All @@ -30,6 +33,7 @@
#include <parthenon_mpi.hpp>

#include "globals.hpp"
#include "task_timing.hpp"
#include "thread_pool.hpp"
#include "utils/concepts_lite.hpp"
#include "utils/error_checking.hpp"
Expand Down Expand Up @@ -90,6 +94,7 @@ class TaskID {
std::vector<Task *> dep;
};

class TimingAccumulator;
class Task {
public:
Task() = default;
Expand All @@ -112,6 +117,8 @@ class Task {
dependent[static_cast<int>(TaskStatus::incomplete)].push_back(this);
}

static inline bool enable_timing{false};

TaskStatus operator()();
TaskID GetID() { return this; }
std::string GetLabel() const { return label_; }
Expand All @@ -136,6 +143,9 @@ class Task {
}
void reset_iteration() { num_calls = 0; }

std::vector<std::shared_ptr<TimingAccumulator>> timing_accumulators;
bool time_task{false};

private:
std::function<TaskStatus()> f;
// store a list of tasks that might be available to
Expand Down Expand Up @@ -193,6 +203,12 @@ class TaskList {
last_task = tasks.back().get();
}

std::set<std::shared_ptr<TimingAccumulator>> timing_accumulators_;

void RegisterTimingAccumulator(std::shared_ptr<TimingAccumulator> timing_accumulator) {
timing_accumulators_.insert(timing_accumulator);
}

template <class... Args>
TaskID AddTask(TaskID dep, Args &&...args) {
return AddTask(TaskQualifier::normal, dep, std::forward<Args>(args)...);
Expand Down Expand Up @@ -239,6 +255,9 @@ class TaskList {
Task *my_task = tasks.back().get();
TaskID id(my_task);

for (auto &timing_accumulator : timing_accumulators_)
timing_accumulator->CollectTaskIfCollecting(my_task);

if (tq.LocalSync() || tq.GlobalSync() || tq.Once()) {
regional_tasks.push_back(my_task);
}
Expand Down Expand Up @@ -342,6 +361,7 @@ class TaskList {
std::pair<TaskList &, TaskID> AddSublist(TID &&dep, std::pair<int, int> minmax_iters) {
sublists.push_back(std::make_shared<TaskList>(dep, minmax_iters));
auto &tl = *sublists.back();
tl.timing_accumulators_ = this->timing_accumulators_;
tl.SetID(unique_id);
return std::make_pair(std::ref(tl), TaskID(tl.last_task));
}
Expand Down
Loading
Loading