diff --git a/src/Allocs.jl b/src/Allocs.jl index 78f2aa6..8b0add7 100644 --- a/src/Allocs.jl +++ b/src/Allocs.jl @@ -28,6 +28,10 @@ The `kwargs` are the same as [`PProf.pprof`](@ref), except: of every allocation. Note that this tends to make the Graph view harder to read, because it's over-aggregated, so we recommend filtering out the `Type:` nodes in the PProf web UI. +- `skip_gc_internal::Bool = true`: By default, we hide the tail-end of the + stacktrace because it's not very useful. *Every* frame ends with some `jl_gc_*_alloc`, + which calls `maybe_record_alloc_to_profile`, which is distracting in Profile viewer. + This is similar to `skip_jl_dispatch` in the default CPU pprof() function. """ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch() ; @@ -40,9 +44,12 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch keep_frames::Union{Nothing, AbstractString} = nothing, ui_relative_percentages::Bool = true, full_signatures::Bool = true, + skip_jl_dispatch::Bool = false, # Allocs-specific arguments: + skip_gc_internal::Bool = true, frame_for_type::Bool = true, ) + PProf.log_greeting(skip_jl_dispatch) period = UInt64(0x1) @assert !isempty(basename(out)) "`out=` must specify a file path to write to. Got unexpected: '$out'" @@ -63,6 +70,7 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch # specializations to PProf Functions. We use a hash of the function for the key. funcs_map = Dict{UInt64, UInt64}() functions = Vector{Function}() + funcs_to_skip = Set{Any}() # NOTE: It's a bug to use the actual StackFrame itself as a key in a dictionary, since # different StackFrames can compare the same sometimes! 🙀 So we manually compute an ID @@ -80,23 +88,26 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch drop_frames = isnothing(drop_frames) ? 0 : enter!(drop_frames) keep_frames = isnothing(keep_frames) ? 0 : enter!(keep_frames) - function maybe_add_location(frame::StackFrame)::UInt64 + function maybe_add_location(frame::StackFrame)::Tuple{Bool,UInt64} + # Use a unique function id for the frame: # See: https://github.com/JuliaPerf/PProf.jl/issues/69 function_key = method_instance_id(frame) line_number = frame.line + + function_key in funcs_to_skip && return false, 0 + # For allocations profile, we uniquely identify each frame position by # (function,line_number) pairs. That identifies a position in the code. frame_key = (function_key, line_number) - return get!(locs_map, frame_key) do + + success = Ref(true) + loc = get!(locs_map, frame_key) do loc_id = UInt64(length(locations) + 1) # Extract info from the location frame (function_name, file_name) = string(frame.func), string(frame.file) - # Use a unique function id for the frame: - function_key = method_instance_id(frame) - # Decode the IP into information about this stack frame function_id = get!(funcs_map, function_key) do func_id = UInt64(length(functions) + 1) @@ -121,6 +132,14 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch end isempty(simple_name) && (simple_name = "[unknown function]") isempty(full_name_with_args) && (full_name_with_args = "[unknown function]") + + if !PProf.should_keep_frame(from_c, skip_jl_dispatch, + skip_gc_internal, frame, simple_name) + success[] = false + push!(funcs_to_skip, function_key) + return 0 + end + # WEIRD TRICK: By entering a separate copy of the string (with a # different string id) for the name and system_name, pprof will use # the supplied `name` *verbatim*, without pruning off the arguments. @@ -131,6 +150,7 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch else name = enter!(simple_name) end + file = Base.find_source_file(file_name) file = file !== nothing ? file : file_name filename = enter!(file) @@ -139,6 +159,10 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch return func_id end + if !success[] + return 0 + end + push!( locations, Location(;id = loc_id, line=[Line(function_id, line_number)]) @@ -146,6 +170,8 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch return loc_id end + + return success[], loc end type_name_cache = Dict{Any,String}() @@ -158,17 +184,21 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch function construct_location_for_type(typename) # TODO: Lol something less hacky than this: - return maybe_add_location(StackFrame(get_type_name(typename), "nothing", 0)) + _, loc = maybe_add_location(StackFrame(get_type_name(typename), "nothing", 0)) + return loc end # convert the sample.stack to vector of location_ids @showprogress "Analyzing $(length(alloc_profile.allocs)) allocation samples..." for sample in alloc_profile.allocs # for each location in the sample.stack, if it's the first time seeing it, # we also enter that location into the locations table - location_ids = UInt64[ - maybe_add_location(frame) - for frame in sample.stacktrace if (!frame.from_c || from_c) - ] + location_ids = sizehint!(UInt64[], length(sample.stacktrace)) + for frame in sample.stacktrace + should_add, loc = maybe_add_location(frame) + if should_add + push!(location_ids, loc) + end + end if frame_for_type # Add location_id for the type: diff --git a/src/PProf.jl b/src/PProf.jl index e17fc6f..aa5734f 100644 --- a/src/PProf.jl +++ b/src/PProf.jl @@ -81,6 +81,16 @@ You can also use `PProf.refresh(file="...")` to open a new file in the server. - `keep_frames`: frames with function_name fully matching regexp string will be kept, even if it matches drop_functions. - `ui_relative_percentages`: Passes `-relative_percentages` to pprof. Causes nodes ignored/hidden through the web UI to be ignored from totals when computing percentages. +- `skip_jl_dispatch::Bool = false`: If true, we hide the internal frames from + julia dynamic dispatch: `jl_apply_generic`, `jl_apply`, `jl_invoke`, etc. This helps a lot + in the Graph view, because otherwise they imply a false recursion in your program when + there are multiple dynamic dispatches in your stacktrace. However, the dispatches can be + very useful in the flamegraph view, since they often represent performance problems. So + consider switching this based on which pprof UI view you're using. +- `skip_gc_internal::Bool = false`: Provided for consistency with PProf.Alloc.pprof(). + True by default, since this would only be present when running an Alloc profile at the + same time, and you may want to know the impact of that on your CPU profile. See + [`PProf.Alloc.pprof`](@ref). """ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, lidict::Union{Nothing, Dict} = nothing; @@ -94,7 +104,10 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, drop_frames::Union{Nothing, AbstractString} = nothing, keep_frames::Union{Nothing, AbstractString} = nothing, ui_relative_percentages::Bool = true, + skip_jl_dispatch::Bool = false, + skip_gc_internal::Bool = false, ) + log_greeting(skip_jl_dispatch) if data === nothing data = if isdefined(Profile, :has_meta) copy(Profile.fetch(include_meta = false)) @@ -127,10 +140,11 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, # Functions need a uid, we'll use the pointer for the method instance seen_funcs = Set{UInt64}() funcs = Dict{UInt64, Function}() + funcs_to_skip = Set{UInt64}() seen_locs = Set{UInt64}() locs = Dict{UInt64, Location}() - locs_from_c = Dict{UInt64, Bool}() + locs_to_skip = Set{UInt64}() samples = Vector{Sample}() sample_type = [ @@ -173,7 +187,7 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, # if we have already seen this IP avoid decoding it again if ip in seen_locs # Only keep C frames if from_c=true - if (from_c || !locs_from_c[ip]) + if !(ip in locs_to_skip) push!(location_id, ip) end continue @@ -183,6 +197,7 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, # Decode the IP into information about this stack frame (or frames given inlining) location = Location(;id = ip, address = ip) location_from_c = true + should_keep_location = false # Will have multiple frames if frames were inlined (the last frame is the "real # function", the inlinee) for frame in lookup[ip] @@ -194,10 +209,13 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, # Use a unique function id for the frame: func_id = method_instance_id(frame) - push!(location.line, Line(function_id = func_id, line = frame.line)) # Known function - func_id in seen_funcs && continue + if func_id in seen_funcs && !(func_id in funcs_to_skip) + should_keep_location = true + push!(location.line, Line(function_id = func_id, line = frame.line)) + continue + end push!(seen_funcs, func_id) # Store the function in our functions dict @@ -220,6 +238,15 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, end isempty(simple_name) && (simple_name = "[unknown function]") isempty(full_name_with_args) && (full_name_with_args = "[unknown function]") + + # Only keep C functions if from_c=true, and it's not a skipped internal frame. + if !should_keep_frame(from_c, skip_jl_dispatch, + skip_gc_internal, frame, simple_name) + push!(funcs_to_skip, func_id) + continue + end + should_keep_location = true + # WEIRD TRICK: By entering a separate copy of the string (with a # different string id) for the name and system_name, pprof will use # the supplied `name` *verbatim*, without pruning off the arguments. @@ -232,16 +259,17 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, end file = Base.find_source_file(file) filename = enter!(file) - # Only keep C functions if from_c=true - if (from_c || !frame.from_c) - funcs[func_id] = Function(func_id, name, system_name, filename, start_line) - end + + # Save the new function to the Location + funcs[func_id] = Function(func_id, name, system_name, filename, start_line) + push!(location.line, Line(function_id = func_id, line = frame.line)) end - locs_from_c[ip] = location_from_c - # Only keep C frames if from_c=true - if (from_c || !location_from_c) + # If any of the frames were kept, save the location + if should_keep_location locs[ip] = location push!(location_id, ip) + else + push!(locs_to_skip, ip) end end @@ -272,6 +300,22 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, out end +log_once = false +function log_greeting(skip_jl_dispatch) + if !(log_once::Bool) + global log_once = true + if !skip_jl_dispatch + @info """👋 Welcome to PProf! + + Consider trying `skip_jl_dispatch = true` as well, for a cleaner Graph view. + The current options are best for the Flamegraph view. + + Now outputting your profile... + """ + end + end +end + function _escape_name_for_pprof(name) # HACK: Apparently proto doesn't escape func names with `"` in them ... >.< # TODO: Remove this hack after https://github.com/google/pprof/pull/564 @@ -291,6 +335,41 @@ function method_instance_id(frame) end end +function should_keep_frame(from_c, no_jl, no_gc, frame, name) + if ((from_c || !frame.from_c) + && !(no_jl && is_julia_dispatch_internal(name)) + && !(no_gc && is_gc_internal(name))) + return true + end + return false +end +function is_julia_dispatch_internal(name) + return name in ( + "jl_apply", + "_jl_apply", + "jl_invoke", + "_jl_invoke", + "jl_apply_generic", + "_jl_apply_generic", + "ijl_apply_generic", + ) +end +function is_gc_internal(name) + return name in ( + "jl_gc_alloc", + "jl_gc_alloc_", + "jl_gc_pool_alloc", + "jl_gc_big_alloc", + "jl_gc_alloc_typed", + "ijl_gc_pool_alloc", + "ijl_gc_big_alloc", + "ijl_gc_alloc_typed", + "ijl_gc_pool_alloc_instrumented", + "ijl_gc_big_alloc_instrumented", + "maybe_record_alloc_to_profile", + ) +end + """ refresh(; webhost = "localhost", webport = 57599, file = "profile.pb.gz", ui_relative_percentages = true) @@ -322,6 +401,8 @@ function refresh(; webhost::AbstractString = "localhost", proc[] = pprof_jll.pprof() do pprof_path open(pipeline(`$pprof_path -http=$webhost:$webport $relative_percentages_flag $file`)) end + + sleep(1) # Wait for the server to start. This is usually long enough. end """ diff --git a/test/Allocs.jl b/test/Allocs.jl index 0a9e683..4692d14 100644 --- a/test/Allocs.jl +++ b/test/Allocs.jl @@ -12,8 +12,8 @@ const out = tempname() Profile.Allocs.clear() Profile.Allocs.@profile sample_rate=1.0 begin # Profile compilation - @eval foo(x,y) = x * y + y / x - @eval foo(2, 3) + @eval foo_alloc(x,y) = x * y + x / y + @eval foo_alloc(1,3) end # Write the profile @@ -63,4 +63,34 @@ end @test float_loc !== nothing end +function load_prof_proto(file) + @show file + open(io->decode(ProtoDecoder(io), PProf.perftools.profiles.Profile), file, "r") +end + +@testset "skip_jl_dispatch" begin + Profile.Allocs.clear(); + Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1) + + @assert length(Profile.Allocs.fetch().allocs) > 0 + + args = (; out=tempname(), web=false) + matches(r, proto) = any(s->occursin(r, s), proto.string_table) + @test matches("jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=false))) + @test !matches("jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=true))) +end +@testset "skip_gc_internal" begin + Profile.Allocs.clear(); + Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1) + + @assert length(Profile.Allocs.fetch().allocs) > 0 + + args = (; out=tempname(), web=false) + matches(r, proto) = any(s->occursin(r, s), proto.string_table) + @test matches(r"jl_gc_.*alloc", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=false))) + @test !matches(r"jl_gc_.*alloc", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=true))) +end + + + end # module PProfAllocsTest diff --git a/test/PProf.jl b/test/PProf.jl index 180a72d..8fefeed 100644 --- a/test/PProf.jl +++ b/test/PProf.jl @@ -28,8 +28,8 @@ end @profile for i in 1:10000 # Profile compilation - @eval foo(x,y) = x * y + x / y - @eval foo($i,3) + @eval $(Symbol("foo$i"))(x,y) = x * y + x / y + @eval $(Symbol("foo$i"))($i,3) end # Cache profile output to test that it isn't changed @@ -98,6 +98,15 @@ end @test load_prof_proto(pprof(out=tempname(), web=false, keep_frames = "foo")).keep_frames != 0 end +@testset "skip_jl_dispatch" begin + Profile.clear() + @profile Base.inferencebarrier(foo)(1000000, 5, []) + args = (; out=tempname(), web=false) + matches(r, proto) = any(s->occursin(r, s), proto.string_table) + @test matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_jl_dispatch=false))) + @test !matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_jl_dispatch=true))) +end + @testset "@pprof macro" begin @pprof foo(10000, 5, [])