From 9e9c12dd98ba184dcbfc3390173921fcac333c9c Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sat, 29 Jul 2023 22:21:55 -0400 Subject: [PATCH 1/7] Add options to filter out internal GC and dispatch frames The GC frames are skipped by default now, since they make the GC graphs much less readable, and everyone basically always wants them skipped. I also refactored things a bit in the process, to allow for only checking these once per frame, and caching the result. --- src/Allocs.jl | 49 +++++++++++++++++++++++------ src/PProf.jl | 84 +++++++++++++++++++++++++++++++++++++++++++------- test/Allocs.jl | 24 +++++++++++++++ test/PProf.jl | 13 ++++++-- 4 files changed, 147 insertions(+), 23 deletions(-) diff --git a/src/Allocs.jl b/src/Allocs.jl index 78f2aa6..3e1306c 100644 --- a/src/Allocs.jl +++ b/src/Allocs.jl @@ -28,6 +28,10 @@ The `kwargs` are the same as [`PProf.pprof`](@ref), except: of every allocation. Note that this tends to make the Graph view harder to read, because it's over-aggregated, so we recommend filtering out the `Type:` nodes in the PProf web UI. +- `skip_gc_internal_frames::Bool = true`: By default, we hide the tail-end of the + stacktrace because it's not very useful. *Every* frame ends with some `jl_gc_*_alloc`, + which calls `maybe_record_alloc_to_profile`, which is distracting in Profile viewer. + This is similar to `skip_julia_dispatch_frames` in the default CPU pprof() function. """ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch() ; @@ -40,7 +44,9 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch keep_frames::Union{Nothing, AbstractString} = nothing, ui_relative_percentages::Bool = true, full_signatures::Bool = true, + skip_julia_dispatch_frames::Bool = false, # Allocs-specific arguments: + skip_gc_internal_frames::Bool = true, frame_for_type::Bool = true, ) period = UInt64(0x1) @@ -63,6 +69,7 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch # specializations to PProf Functions. We use a hash of the function for the key. funcs_map = Dict{UInt64, UInt64}() functions = Vector{Function}() + funcs_to_skip = Set{Any}() # NOTE: It's a bug to use the actual StackFrame itself as a key in a dictionary, since # different StackFrames can compare the same sometimes! 🙀 So we manually compute an ID @@ -80,23 +87,26 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch drop_frames = isnothing(drop_frames) ? 0 : enter!(drop_frames) keep_frames = isnothing(keep_frames) ? 0 : enter!(keep_frames) - function maybe_add_location(frame::StackFrame)::UInt64 + function maybe_add_location(frame::StackFrame)::Tuple{Bool,UInt64} + # Use a unique function id for the frame: # See: https://github.com/JuliaPerf/PProf.jl/issues/69 function_key = method_instance_id(frame) line_number = frame.line + + function_key in funcs_to_skip && return false, 0 + # For allocations profile, we uniquely identify each frame position by # (function,line_number) pairs. That identifies a position in the code. frame_key = (function_key, line_number) - return get!(locs_map, frame_key) do + + success = Ref(true) + loc = get!(locs_map, frame_key) do loc_id = UInt64(length(locations) + 1) # Extract info from the location frame (function_name, file_name) = string(frame.func), string(frame.file) - # Use a unique function id for the frame: - function_key = method_instance_id(frame) - # Decode the IP into information about this stack frame function_id = get!(funcs_map, function_key) do func_id = UInt64(length(functions) + 1) @@ -121,6 +131,14 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch end isempty(simple_name) && (simple_name = "[unknown function]") isempty(full_name_with_args) && (full_name_with_args = "[unknown function]") + + if !PProf.should_keep_frame(from_c, skip_julia_dispatch_frames, + skip_gc_internal_frames, frame, simple_name) + success[] = false + push!(funcs_to_skip, function_key) + return 0 + end + # WEIRD TRICK: By entering a separate copy of the string (with a # different string id) for the name and system_name, pprof will use # the supplied `name` *verbatim*, without pruning off the arguments. @@ -131,6 +149,7 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch else name = enter!(simple_name) end + file = Base.find_source_file(file_name) file = file !== nothing ? file : file_name filename = enter!(file) @@ -139,6 +158,10 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch return func_id end + if !success[] + return 0 + end + push!( locations, Location(;id = loc_id, line=[Line(function_id, line_number)]) @@ -146,6 +169,8 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch return loc_id end + + return success[], loc end type_name_cache = Dict{Any,String}() @@ -158,17 +183,21 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch function construct_location_for_type(typename) # TODO: Lol something less hacky than this: - return maybe_add_location(StackFrame(get_type_name(typename), "nothing", 0)) + _, loc = maybe_add_location(StackFrame(get_type_name(typename), "nothing", 0)) + return loc end # convert the sample.stack to vector of location_ids @showprogress "Analyzing $(length(alloc_profile.allocs)) allocation samples..." for sample in alloc_profile.allocs # for each location in the sample.stack, if it's the first time seeing it, # we also enter that location into the locations table - location_ids = UInt64[ - maybe_add_location(frame) - for frame in sample.stacktrace if (!frame.from_c || from_c) - ] + location_ids = sizehint!(UInt64[], length(sample.stacktrace)) + for frame in sample.stacktrace + should_add, loc = maybe_add_location(frame) + if should_add + push!(location_ids, loc) + end + end if frame_for_type # Add location_id for the type: diff --git a/src/PProf.jl b/src/PProf.jl index e17fc6f..b1996f2 100644 --- a/src/PProf.jl +++ b/src/PProf.jl @@ -81,6 +81,16 @@ You can also use `PProf.refresh(file="...")` to open a new file in the server. - `keep_frames`: frames with function_name fully matching regexp string will be kept, even if it matches drop_functions. - `ui_relative_percentages`: Passes `-relative_percentages` to pprof. Causes nodes ignored/hidden through the web UI to be ignored from totals when computing percentages. +- `skip_julia_dispatch_frames::Bool = false`: If true, we hide the internal frames from + julia dynamic dispatch: `jl_apply_generic`, `jl_apply`, `jl_invoke`, etc. This helps a lot + in the Graph view, because otherwise they imply a false recursion in your program when + there are multiple dynamic dispatches in your stacktrace. However, the dispatches can be + very useful in the flamegraph view, since they often represent performance problems. So + consider switching this based on which pprof UI view you're using. +- `skip_gc_internal_frames::Bool = false`: Provided for consistency with PProf.Alloc.pprof(). + True by default, since this would only be present when running an Alloc profile at the + same time, and you may want to know the impact of that on your CPU profile. See + [`PProf.Alloc.pprof`](@ref). """ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, lidict::Union{Nothing, Dict} = nothing; @@ -94,6 +104,8 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, drop_frames::Union{Nothing, AbstractString} = nothing, keep_frames::Union{Nothing, AbstractString} = nothing, ui_relative_percentages::Bool = true, + skip_julia_dispatch_frames::Bool = false, + skip_gc_internal_frames::Bool = false, ) if data === nothing data = if isdefined(Profile, :has_meta) @@ -127,10 +139,11 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, # Functions need a uid, we'll use the pointer for the method instance seen_funcs = Set{UInt64}() funcs = Dict{UInt64, Function}() + funcs_to_skip = Set{UInt64}() seen_locs = Set{UInt64}() locs = Dict{UInt64, Location}() - locs_from_c = Dict{UInt64, Bool}() + locs_to_skip = Set{UInt64}() samples = Vector{Sample}() sample_type = [ @@ -173,7 +186,7 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, # if we have already seen this IP avoid decoding it again if ip in seen_locs # Only keep C frames if from_c=true - if (from_c || !locs_from_c[ip]) + if !(ip in locs_to_skip) push!(location_id, ip) end continue @@ -183,6 +196,7 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, # Decode the IP into information about this stack frame (or frames given inlining) location = Location(;id = ip, address = ip) location_from_c = true + should_keep_location = false # Will have multiple frames if frames were inlined (the last frame is the "real # function", the inlinee) for frame in lookup[ip] @@ -194,10 +208,13 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, # Use a unique function id for the frame: func_id = method_instance_id(frame) - push!(location.line, Line(function_id = func_id, line = frame.line)) # Known function - func_id in seen_funcs && continue + if func_id in seen_funcs && !(func_id in funcs_to_skip) + should_keep_location = true + push!(location.line, Line(function_id = func_id, line = frame.line)) + continue + end push!(seen_funcs, func_id) # Store the function in our functions dict @@ -220,6 +237,15 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, end isempty(simple_name) && (simple_name = "[unknown function]") isempty(full_name_with_args) && (full_name_with_args = "[unknown function]") + + # Only keep C functions if from_c=true, and it's not a skipped internal frame. + if !should_keep_frame(from_c, skip_julia_dispatch_frames, + skip_gc_internal_frames, frame, simple_name) + push!(funcs_to_skip, func_id) + continue + end + should_keep_location = true + # WEIRD TRICK: By entering a separate copy of the string (with a # different string id) for the name and system_name, pprof will use # the supplied `name` *verbatim*, without pruning off the arguments. @@ -232,16 +258,17 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, end file = Base.find_source_file(file) filename = enter!(file) - # Only keep C functions if from_c=true - if (from_c || !frame.from_c) - funcs[func_id] = Function(func_id, name, system_name, filename, start_line) - end + + # Save the new function to the Location + funcs[func_id] = Function(func_id, name, system_name, filename, start_line) + push!(location.line, Line(function_id = func_id, line = frame.line)) end - locs_from_c[ip] = location_from_c - # Only keep C frames if from_c=true - if (from_c || !location_from_c) + # If any of the frames were kept, save the location + if should_keep_location locs[ip] = location push!(location_id, ip) + else + push!(locs_to_skip, ip) end end @@ -291,6 +318,41 @@ function method_instance_id(frame) end end +function should_keep_frame(from_c, no_jl, no_gc, frame, name) + if ((from_c || !frame.from_c) + && !(no_jl && is_julia_dispatch_internal(name)) + && !(no_gc && is_gc_internal(name))) + return true + end + return false +end +function is_julia_dispatch_internal(name) + return name in ( + "jl_apply", + "_jl_apply", + "jl_invoke", + "_jl_invoke", + "jl_apply_generic", + "_jl_apply_generic", + "ijl_apply_generic", + ) +end +function is_gc_internal(name) + return name in ( + "jl_gc_alloc", + "jl_gc_alloc_", + "jl_gc_pool_alloc", + "jl_gc_big_alloc", + "jl_gc_alloc_typed", + "ijl_gc_pool_alloc", + "ijl_gc_big_alloc", + "ijl_gc_alloc_typed", + "ijl_gc_pool_alloc_instrumented", + "ijl_gc_big_alloc_instrumented", + "maybe_record_alloc_to_profile", + ) +end + """ refresh(; webhost = "localhost", webport = 57599, file = "profile.pb.gz", ui_relative_percentages = true) diff --git a/test/Allocs.jl b/test/Allocs.jl index 0a9e683..d1a27d4 100644 --- a/test/Allocs.jl +++ b/test/Allocs.jl @@ -63,4 +63,28 @@ end @test float_loc !== nothing end +function load_prof_proto(file) + @show file + open(io->decode(ProtoDecoder(io), PProf.perftools.profiles.Profile), file, "r") +end + +@testset "skip_julia_dispatch_frames" begin + Profile.Allocs.clear(); + Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1) + args = (; out=tempname(), web=false) + matches(r, proto) = any(s->occursin(r, s), proto.string_table) + @test matches(r"jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_julia_dispatch_frames=false))) + @test !matches(r"jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_julia_dispatch_frames=true))) +end +@testset "skip_gc_internal_frames" begin + Profile.Allocs.clear(); + Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1) + args = (; out=tempname(), web=false) + matches(r, proto) = any(s->occursin(r, s), proto.string_table) + @test matches(r"maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal_frames=false))) + @test !matches(r"maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal_frames=true))) +end + + + end # module PProfAllocsTest diff --git a/test/PProf.jl b/test/PProf.jl index 180a72d..97aa62b 100644 --- a/test/PProf.jl +++ b/test/PProf.jl @@ -28,8 +28,8 @@ end @profile for i in 1:10000 # Profile compilation - @eval foo(x,y) = x * y + x / y - @eval foo($i,3) + @eval $(Symbol("foo$i"))(x,y) = x * y + x / y + @eval $(Symbol("foo$i"))($i,3) end # Cache profile output to test that it isn't changed @@ -98,6 +98,15 @@ end @test load_prof_proto(pprof(out=tempname(), web=false, keep_frames = "foo")).keep_frames != 0 end +@testset "skip_julia_dispatch_frames" begin + Profile.clear() + @profile Base.inferencebarrier(foo)(1000000, 5, []) + args = (; out=tempname(), web=false) + matches(r, proto) = any(s->occursin(r, s), proto.string_table) + @test matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_julia_dispatch_frames=false))) + @test !matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_julia_dispatch_frames=true))) +end + @testset "@pprof macro" begin @pprof foo(10000, 5, []) From 5b34e6fa1e63bb2147d4d93f49260fa168dd9967 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sat, 29 Jul 2023 22:36:21 -0400 Subject: [PATCH 2/7] Simpler kwarg names --- src/Allocs.jl | 12 ++++++------ src/PProf.jl | 12 ++++++------ test/Allocs.jl | 12 ++++++------ test/PProf.jl | 6 +++--- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/Allocs.jl b/src/Allocs.jl index 3e1306c..b93f53b 100644 --- a/src/Allocs.jl +++ b/src/Allocs.jl @@ -28,10 +28,10 @@ The `kwargs` are the same as [`PProf.pprof`](@ref), except: of every allocation. Note that this tends to make the Graph view harder to read, because it's over-aggregated, so we recommend filtering out the `Type:` nodes in the PProf web UI. -- `skip_gc_internal_frames::Bool = true`: By default, we hide the tail-end of the +- `skip_gc_internal::Bool = true`: By default, we hide the tail-end of the stacktrace because it's not very useful. *Every* frame ends with some `jl_gc_*_alloc`, which calls `maybe_record_alloc_to_profile`, which is distracting in Profile viewer. - This is similar to `skip_julia_dispatch_frames` in the default CPU pprof() function. + This is similar to `skip_jl_dispatch` in the default CPU pprof() function. """ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch() ; @@ -44,9 +44,9 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch keep_frames::Union{Nothing, AbstractString} = nothing, ui_relative_percentages::Bool = true, full_signatures::Bool = true, - skip_julia_dispatch_frames::Bool = false, + skip_jl_dispatch::Bool = false, # Allocs-specific arguments: - skip_gc_internal_frames::Bool = true, + skip_gc_internal::Bool = true, frame_for_type::Bool = true, ) period = UInt64(0x1) @@ -132,8 +132,8 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch isempty(simple_name) && (simple_name = "[unknown function]") isempty(full_name_with_args) && (full_name_with_args = "[unknown function]") - if !PProf.should_keep_frame(from_c, skip_julia_dispatch_frames, - skip_gc_internal_frames, frame, simple_name) + if !PProf.should_keep_frame(from_c, skip_jl_dispatch, + skip_gc_internal, frame, simple_name) success[] = false push!(funcs_to_skip, function_key) return 0 diff --git a/src/PProf.jl b/src/PProf.jl index b1996f2..5dac743 100644 --- a/src/PProf.jl +++ b/src/PProf.jl @@ -81,13 +81,13 @@ You can also use `PProf.refresh(file="...")` to open a new file in the server. - `keep_frames`: frames with function_name fully matching regexp string will be kept, even if it matches drop_functions. - `ui_relative_percentages`: Passes `-relative_percentages` to pprof. Causes nodes ignored/hidden through the web UI to be ignored from totals when computing percentages. -- `skip_julia_dispatch_frames::Bool = false`: If true, we hide the internal frames from +- `skip_jl_dispatch::Bool = false`: If true, we hide the internal frames from julia dynamic dispatch: `jl_apply_generic`, `jl_apply`, `jl_invoke`, etc. This helps a lot in the Graph view, because otherwise they imply a false recursion in your program when there are multiple dynamic dispatches in your stacktrace. However, the dispatches can be very useful in the flamegraph view, since they often represent performance problems. So consider switching this based on which pprof UI view you're using. -- `skip_gc_internal_frames::Bool = false`: Provided for consistency with PProf.Alloc.pprof(). +- `skip_gc_internal::Bool = false`: Provided for consistency with PProf.Alloc.pprof(). True by default, since this would only be present when running an Alloc profile at the same time, and you may want to know the impact of that on your CPU profile. See [`PProf.Alloc.pprof`](@ref). @@ -104,8 +104,8 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, drop_frames::Union{Nothing, AbstractString} = nothing, keep_frames::Union{Nothing, AbstractString} = nothing, ui_relative_percentages::Bool = true, - skip_julia_dispatch_frames::Bool = false, - skip_gc_internal_frames::Bool = false, + skip_jl_dispatch::Bool = false, + skip_gc_internal::Bool = false, ) if data === nothing data = if isdefined(Profile, :has_meta) @@ -239,8 +239,8 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, isempty(full_name_with_args) && (full_name_with_args = "[unknown function]") # Only keep C functions if from_c=true, and it's not a skipped internal frame. - if !should_keep_frame(from_c, skip_julia_dispatch_frames, - skip_gc_internal_frames, frame, simple_name) + if !should_keep_frame(from_c, skip_jl_dispatch, + skip_gc_internal, frame, simple_name) push!(funcs_to_skip, func_id) continue end diff --git a/test/Allocs.jl b/test/Allocs.jl index d1a27d4..25b2748 100644 --- a/test/Allocs.jl +++ b/test/Allocs.jl @@ -68,21 +68,21 @@ function load_prof_proto(file) open(io->decode(ProtoDecoder(io), PProf.perftools.profiles.Profile), file, "r") end -@testset "skip_julia_dispatch_frames" begin +@testset "skip_jl_dispatch" begin Profile.Allocs.clear(); Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1) args = (; out=tempname(), web=false) matches(r, proto) = any(s->occursin(r, s), proto.string_table) - @test matches(r"jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_julia_dispatch_frames=false))) - @test !matches(r"jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_julia_dispatch_frames=true))) + @test matches(r"jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=false))) + @test !matches(r"jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=true))) end -@testset "skip_gc_internal_frames" begin +@testset "skip_gc_internal" begin Profile.Allocs.clear(); Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1) args = (; out=tempname(), web=false) matches(r, proto) = any(s->occursin(r, s), proto.string_table) - @test matches(r"maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal_frames=false))) - @test !matches(r"maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal_frames=true))) + @test matches(r"maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=false))) + @test !matches(r"maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=true))) end diff --git a/test/PProf.jl b/test/PProf.jl index 97aa62b..8fefeed 100644 --- a/test/PProf.jl +++ b/test/PProf.jl @@ -98,13 +98,13 @@ end @test load_prof_proto(pprof(out=tempname(), web=false, keep_frames = "foo")).keep_frames != 0 end -@testset "skip_julia_dispatch_frames" begin +@testset "skip_jl_dispatch" begin Profile.clear() @profile Base.inferencebarrier(foo)(1000000, 5, []) args = (; out=tempname(), web=false) matches(r, proto) = any(s->occursin(r, s), proto.string_table) - @test matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_julia_dispatch_frames=false))) - @test !matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_julia_dispatch_frames=true))) + @test matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_jl_dispatch=false))) + @test !matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_jl_dispatch=true))) end @testset "@pprof macro" begin From e37ef71986181da6ae8e1694b3d2879e82281b61 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sat, 29 Jul 2023 22:42:14 -0400 Subject: [PATCH 3/7] fix test warning on 1.11 --- test/Allocs.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Allocs.jl b/test/Allocs.jl index 25b2748..a7148b4 100644 --- a/test/Allocs.jl +++ b/test/Allocs.jl @@ -12,8 +12,8 @@ const out = tempname() Profile.Allocs.clear() Profile.Allocs.@profile sample_rate=1.0 begin # Profile compilation - @eval foo(x,y) = x * y + y / x - @eval foo(2, 3) + @eval $(Symbol("foo_alloc$i"))(x,y) = x * y + x / y + @eval $(Symbol("foo_alloc$i"))($i,3) end # Write the profile From e161b11ba34eb8c80467d44216d9c4db712134d6 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sat, 29 Jul 2023 22:50:15 -0400 Subject: [PATCH 4/7] Add asserts... Not sure why test failed --- test/Allocs.jl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/test/Allocs.jl b/test/Allocs.jl index a7148b4..44cc38c 100644 --- a/test/Allocs.jl +++ b/test/Allocs.jl @@ -12,8 +12,8 @@ const out = tempname() Profile.Allocs.clear() Profile.Allocs.@profile sample_rate=1.0 begin # Profile compilation - @eval $(Symbol("foo_alloc$i"))(x,y) = x * y + x / y - @eval $(Symbol("foo_alloc$i"))($i,3) + @eval foo_alloc(x,y) = x * y + x / y + @eval foo_alloc(1,3) end # Write the profile @@ -71,18 +71,24 @@ end @testset "skip_jl_dispatch" begin Profile.Allocs.clear(); Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1) + + @assert length(Profile.Allocs.fetch().allocs) > 0 + args = (; out=tempname(), web=false) matches(r, proto) = any(s->occursin(r, s), proto.string_table) - @test matches(r"jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=false))) - @test !matches(r"jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=true))) + @test matches("jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=false))) + @test !matches("jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=true))) end @testset "skip_gc_internal" begin Profile.Allocs.clear(); Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1) + + @assert length(Profile.Allocs.fetch().allocs) > 0 + args = (; out=tempname(), web=false) matches(r, proto) = any(s->occursin(r, s), proto.string_table) - @test matches(r"maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=false))) - @test !matches(r"maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=true))) + @test matches("maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=false))) + @test !matches("maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=true))) end From 2513042366479833905710c56506abb74b629c38 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sat, 29 Jul 2023 23:04:44 -0400 Subject: [PATCH 5/7] I also added this message to tell you about the new flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` julia> PProf.Allocs.pprof() ┌ Info: 👋 Welcome to PProf! │ │ Consider trying `skip_jl_dispatch = true` as well, for a cleaner Graph view. │ The current options are best for the Flamegraph view. │ └ Now outputting your profile... Main binary filename not available. Serving web UI on http://localhost:62261 "alloc-profile.pb.gz" ``` --- src/Allocs.jl | 1 + src/PProf.jl | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/Allocs.jl b/src/Allocs.jl index b93f53b..8b0add7 100644 --- a/src/Allocs.jl +++ b/src/Allocs.jl @@ -49,6 +49,7 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch skip_gc_internal::Bool = true, frame_for_type::Bool = true, ) + PProf.log_greeting(skip_jl_dispatch) period = UInt64(0x1) @assert !isempty(basename(out)) "`out=` must specify a file path to write to. Got unexpected: '$out'" diff --git a/src/PProf.jl b/src/PProf.jl index 5dac743..9a54750 100644 --- a/src/PProf.jl +++ b/src/PProf.jl @@ -107,6 +107,7 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, skip_jl_dispatch::Bool = false, skip_gc_internal::Bool = false, ) + log_greeting(skip_jl_dispatch) if data === nothing data = if isdefined(Profile, :has_meta) copy(Profile.fetch(include_meta = false)) @@ -299,6 +300,22 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, out end +log_once::Bool = false +function log_greeting(skip_jl_dispatch) + if !log_once + global log_once = true + if !skip_jl_dispatch + @info """👋 Welcome to PProf! + + Consider trying `skip_jl_dispatch = true` as well, for a cleaner Graph view. + The current options are best for the Flamegraph view. + + Now outputting your profile... + """ + end + end +end + function _escape_name_for_pprof(name) # HACK: Apparently proto doesn't escape func names with `"` in them ... >.< # TODO: Remove this hack after https://github.com/google/pprof/pull/564 @@ -384,6 +401,8 @@ function refresh(; webhost::AbstractString = "localhost", proc[] = pprof_jll.pprof() do pprof_path open(pipeline(`$pprof_path -http=$webhost:$webport $relative_percentages_flag $file`)) end + + sleep(1) # Wait for the server to start. This is usually long enough. end """ From c0bc397ef0fdb68a59b164fa7de04f989e98483c Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sat, 29 Jul 2023 23:14:07 -0400 Subject: [PATCH 6/7] fixup for older versions --- src/PProf.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PProf.jl b/src/PProf.jl index 9a54750..aa5734f 100644 --- a/src/PProf.jl +++ b/src/PProf.jl @@ -300,9 +300,9 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, out end -log_once::Bool = false +log_once = false function log_greeting(skip_jl_dispatch) - if !log_once + if !(log_once::Bool) global log_once = true if !skip_jl_dispatch @info """👋 Welcome to PProf! From c53b14b8c328a64c7b6eee8c7c3e76a2b5356707 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 30 Jul 2023 00:41:55 -0400 Subject: [PATCH 7/7] Make test more robust; alloc frame should always be there --- test/Allocs.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Allocs.jl b/test/Allocs.jl index 44cc38c..4692d14 100644 --- a/test/Allocs.jl +++ b/test/Allocs.jl @@ -87,8 +87,8 @@ end args = (; out=tempname(), web=false) matches(r, proto) = any(s->occursin(r, s), proto.string_table) - @test matches("maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=false))) - @test !matches("maybe_record_alloc_to_profile", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=true))) + @test matches(r"jl_gc_.*alloc", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=false))) + @test !matches(r"jl_gc_.*alloc", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=true))) end