Skip to content

std.Progress: fix many bugs #23725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 3, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 89 additions & 53 deletions lib/std/Progress.zig
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,20 @@ draw_buffer: []u8,
/// CPU cache.
node_parents: []Node.Parent,
node_storage: []Node.Storage,
node_freelist: []Node.OptionalIndex,
node_freelist_first: Node.OptionalIndex,
node_freelist_next: []Node.OptionalIndex,
node_freelist: Freelist,
/// This is the number of elements in node arrays which have been used so far. Nodes before this
/// index are either active, or on the freelist. The remaining nodes are implicitly free. This
/// value may at times temporarily exceed the node count.
node_end_index: u32,

const Freelist = packed struct(u32) {
head: Node.OptionalIndex,
/// Whenever `node_freelist` is added to, this generation is incremented
/// to avoid ABA bugs when acquiring nodes. Wrapping arithmetic is used.
generation: u24,
};

pub const TerminalMode = union(enum) {
off,
ansi_escape_codes,
Expand Down Expand Up @@ -112,7 +122,7 @@ pub const Node = struct {
// causes `completed_count` to be treated as a file descriptor, so
// the order here matters.
@atomicStore(u32, &s.completed_count, integer, .monotonic);
@atomicStore(u32, &s.estimated_total_count, std.math.maxInt(u32), .release);
@atomicStore(u32, &s.estimated_total_count, std.math.maxInt(u32), .release); // synchronizes with acquire in `serialize`
}

/// Not thread-safe.
Expand Down Expand Up @@ -184,12 +194,24 @@ pub const Node = struct {
const node_index = node.index.unwrap() orelse return Node.none;
const parent = node_index.toParent();

const freelist_head = &global_progress.node_freelist_first;
var opt_free_index = @atomicLoad(Node.OptionalIndex, freelist_head, .seq_cst);
while (opt_free_index.unwrap()) |free_index| {
const freelist_ptr = freelistByIndex(free_index);
const next = @atomicLoad(Node.OptionalIndex, freelist_ptr, .seq_cst);
opt_free_index = @cmpxchgWeak(Node.OptionalIndex, freelist_head, opt_free_index, next, .seq_cst, .seq_cst) orelse {
const freelist = &global_progress.node_freelist;
var old_freelist = @atomicLoad(Freelist, freelist, .acquire); // acquire to ensure we have the correct "next" entry
while (old_freelist.head.unwrap()) |free_index| {
const next_ptr = freelistNextByIndex(free_index);
const new_freelist: Freelist = .{
.head = @atomicLoad(Node.OptionalIndex, next_ptr, .monotonic),
// We don't need to increment the generation when removing nodes from the free list,
// only when adding them. (This choice is arbitrary; the opposite would also work.)
.generation = old_freelist.generation,
};
old_freelist = @cmpxchgWeak(
Freelist,
freelist,
old_freelist,
new_freelist,
.acquire, // not theoretically necessary, but not allowed to be weaker than the failure order
.acquire, // ensure we have the correct `node_freelist_next` entry on the next iteration
) orelse {
// We won the allocation race.
return init(free_index, parent, name, estimated_total_items);
};
Expand Down Expand Up @@ -243,18 +265,28 @@ pub const Node = struct {
}
const index = n.index.unwrap() orelse return;
const parent_ptr = parentByIndex(index);
if (parent_ptr.unwrap()) |parent_index| {
if (@atomicLoad(Node.Parent, parent_ptr, .monotonic).unwrap()) |parent_index| {
_ = @atomicRmw(u32, &storageByIndex(parent_index).completed_count, .Add, 1, .monotonic);
@atomicStore(Node.Parent, parent_ptr, .unused, .seq_cst);
@atomicStore(Node.Parent, parent_ptr, .unused, .monotonic);

const freelist_head = &global_progress.node_freelist_first;
var first = @atomicLoad(Node.OptionalIndex, freelist_head, .seq_cst);
const freelist = &global_progress.node_freelist;
var old_freelist = @atomicLoad(Freelist, freelist, .monotonic);
while (true) {
@atomicStore(Node.OptionalIndex, freelistByIndex(index), first, .seq_cst);
first = @cmpxchgWeak(Node.OptionalIndex, freelist_head, first, index.toOptional(), .seq_cst, .seq_cst) orelse break;
@atomicStore(Node.OptionalIndex, freelistNextByIndex(index), old_freelist.head, .monotonic);
old_freelist = @cmpxchgWeak(
Freelist,
freelist,
old_freelist,
.{ .head = index.toOptional(), .generation = old_freelist.generation +% 1 },
.release, // ensure a matching `start` sees the freelist link written above
.monotonic, // our write above is irrelevant if we need to retry
) orelse {
// We won the race.
return;
};
}
} else {
@atomicStore(bool, &global_progress.done, true, .seq_cst);
@atomicStore(bool, &global_progress.done, true, .monotonic);
global_progress.redraw_event.set();
if (global_progress.update_thread) |thread| thread.join();
}
Expand Down Expand Up @@ -291,8 +323,8 @@ pub const Node = struct {
return &global_progress.node_parents[@intFromEnum(index)];
}

fn freelistByIndex(index: Node.Index) *Node.OptionalIndex {
return &global_progress.node_freelist[@intFromEnum(index)];
fn freelistNextByIndex(index: Node.Index) *Node.OptionalIndex {
return &global_progress.node_freelist_next[@intFromEnum(index)];
}

fn init(free_index: Index, parent: Parent, name: []const u8, estimated_total_items: usize) Node {
Expand All @@ -307,8 +339,10 @@ pub const Node = struct {
@atomicStore(u8, &storage.name[name_len], 0, .monotonic);

const parent_ptr = parentByIndex(free_index);
assert(parent_ptr.* == .unused);
@atomicStore(Node.Parent, parent_ptr, parent, .release);
if (std.debug.runtime_safety) {
assert(@atomicLoad(Node.Parent, parent_ptr, .monotonic) == .unused);
}
@atomicStore(Node.Parent, parent_ptr, parent, .monotonic);

return .{ .index = free_index.toOptional() };
}
Expand All @@ -329,15 +363,15 @@ var global_progress: Progress = .{

.node_parents = &node_parents_buffer,
.node_storage = &node_storage_buffer,
.node_freelist = &node_freelist_buffer,
.node_freelist_first = .none,
.node_freelist_next = &node_freelist_next_buffer,
.node_freelist = .{ .head = .none, .generation = 0 },
.node_end_index = 0,
};

const node_storage_buffer_len = 83;
var node_parents_buffer: [node_storage_buffer_len]Node.Parent = undefined;
var node_storage_buffer: [node_storage_buffer_len]Node.Storage = undefined;
var node_freelist_buffer: [node_storage_buffer_len]Node.OptionalIndex = undefined;
var node_freelist_next_buffer: [node_storage_buffer_len]Node.OptionalIndex = undefined;

var default_draw_buffer: [4096]u8 = undefined;

Expand Down Expand Up @@ -456,7 +490,7 @@ fn updateThreadRun() void {

{
const resize_flag = wait(global_progress.initial_delay_ns);
if (@atomicLoad(bool, &global_progress.done, .seq_cst)) return;
if (@atomicLoad(bool, &global_progress.done, .monotonic)) return;
maybeUpdateSize(resize_flag);

const buffer, _ = computeRedraw(&serialized_buffer);
Expand All @@ -470,7 +504,7 @@ fn updateThreadRun() void {
while (true) {
const resize_flag = wait(global_progress.refresh_rate_ns);

if (@atomicLoad(bool, &global_progress.done, .seq_cst)) {
if (@atomicLoad(bool, &global_progress.done, .monotonic)) {
stderr_mutex.lock();
defer stderr_mutex.unlock();
return clearWrittenWithEscapeCodes() catch {};
Expand Down Expand Up @@ -500,7 +534,7 @@ fn windowsApiUpdateThreadRun() void {

{
const resize_flag = wait(global_progress.initial_delay_ns);
if (@atomicLoad(bool, &global_progress.done, .seq_cst)) return;
if (@atomicLoad(bool, &global_progress.done, .monotonic)) return;
maybeUpdateSize(resize_flag);

const buffer, const nl_n = computeRedraw(&serialized_buffer);
Expand All @@ -516,7 +550,7 @@ fn windowsApiUpdateThreadRun() void {
while (true) {
const resize_flag = wait(global_progress.refresh_rate_ns);

if (@atomicLoad(bool, &global_progress.done, .seq_cst)) {
if (@atomicLoad(bool, &global_progress.done, .monotonic)) {
stderr_mutex.lock();
defer stderr_mutex.unlock();
return clearWrittenWindowsApi() catch {};
Expand Down Expand Up @@ -558,7 +592,7 @@ fn ipcThreadRun(fd: posix.fd_t) anyerror!void {
{
_ = wait(global_progress.initial_delay_ns);

if (@atomicLoad(bool, &global_progress.done, .seq_cst))
if (@atomicLoad(bool, &global_progress.done, .monotonic))
return;

const serialized = serialize(&serialized_buffer);
Expand All @@ -570,7 +604,7 @@ fn ipcThreadRun(fd: posix.fd_t) anyerror!void {
while (true) {
_ = wait(global_progress.refresh_rate_ns);

if (@atomicLoad(bool, &global_progress.done, .seq_cst))
if (@atomicLoad(bool, &global_progress.done, .monotonic))
return;

const serialized = serialize(&serialized_buffer);
Expand Down Expand Up @@ -765,37 +799,39 @@ fn serialize(serialized_buffer: *Serialized.Buffer) Serialized {
var any_ipc = false;

// Iterate all of the nodes and construct a serializable copy of the state that can be examined
// without atomics.
const end_index = @atomicLoad(u32, &global_progress.node_end_index, .monotonic);
// without atomics. The `@min` call is here because `node_end_index` might briefly exceed the
// node count sometimes.
const end_index = @min(@atomicLoad(u32, &global_progress.node_end_index, .monotonic), global_progress.node_storage.len);
for (
global_progress.node_parents[0..end_index],
global_progress.node_storage[0..end_index],
serialized_buffer.map[0..end_index],
) |*parent_ptr, *storage_ptr, *map| {
var begin_parent = @atomicLoad(Node.Parent, parent_ptr, .acquire);
while (begin_parent != .unused) {
const dest_storage = &serialized_buffer.storage[serialized_len];
copyAtomicLoad(&dest_storage.name, &storage_ptr.name);
dest_storage.estimated_total_count = @atomicLoad(u32, &storage_ptr.estimated_total_count, .acquire);
dest_storage.completed_count = @atomicLoad(u32, &storage_ptr.completed_count, .monotonic);
const end_parent = @atomicLoad(Node.Parent, parent_ptr, .acquire);
if (begin_parent == end_parent) {
any_ipc = any_ipc or (dest_storage.getIpcFd() != null);
serialized_buffer.parents[serialized_len] = begin_parent;
map.* = @enumFromInt(serialized_len);
serialized_len += 1;
break;
}

begin_parent = end_parent;
} else {
// A node may be freed during the execution of this loop, causing
// there to be a parent reference to a nonexistent node. Without
// this assignment, this would lead to the map entry containing
// stale data. By assigning none, the child node with the bad
// parent pointer will be harmlessly omitted from the tree.
const parent = @atomicLoad(Node.Parent, parent_ptr, .monotonic);
if (parent == .unused) {
// We might read "mixed" node data in this loop, due to weird atomic things
// or just a node actually being freed while this loop runs. That could cause
// there to be a parent reference to a nonexistent node. Without this assignment,
// this would lead to the map entry containing stale data. By assigning none, the
// child node with the bad parent pointer will be harmlessly omitted from the tree.
//
// Note that there's no concern of potentially creating "looping" data if we read
// "mixed" node data like this, because if a node is (directly or indirectly) its own
// parent, it will just not be printed at all. The general idea here is that performance
// is more important than 100% correct output every frame, given that this API is likely
// to be used in hot paths!
map.* = .none;
continue;
}
const dest_storage = &serialized_buffer.storage[serialized_len];
copyAtomicLoad(&dest_storage.name, &storage_ptr.name);
dest_storage.estimated_total_count = @atomicLoad(u32, &storage_ptr.estimated_total_count, .acquire); // sychronizes with release in `setIpcFd`
dest_storage.completed_count = @atomicLoad(u32, &storage_ptr.completed_count, .monotonic);

any_ipc = any_ipc or (dest_storage.getIpcFd() != null);
serialized_buffer.parents[serialized_len] = parent;
map.* = @enumFromInt(serialized_len);
serialized_len += 1;
}

// Remap parents to point inside serialized arrays.
Expand Down