diff --git a/cmd/symbolization/main.go b/cmd/symbolization/main.go new file mode 100644 index 0000000000..a8cdf65bd0 --- /dev/null +++ b/cmd/symbolization/main.go @@ -0,0 +1,108 @@ +package main + +import ( + "context" + "fmt" + "log" + + pprof "github.com/google/pprof/profile" + + "github.com/grafana/pyroscope/pkg/experiment/symbolization" +) + +const ( + debuginfodBaseURL = "https://debuginfod.elfutils.org" + buildID = "2fa2055ef20fabc972d5751147e093275514b142" +) + +func main() { + client := symbolization.NewDebuginfodClient(debuginfodBaseURL) + + // Alternatively, use a local debug info file: + //client := &localDebuginfodClient{debugFilePath: "/path/to/your/debug/file"} + + symbolizer := symbolization.NewSymbolizer(client) + ctx := context.Background() + + _, err := client.FetchDebuginfo(buildID) + if err != nil { + log.Fatalf("Failed to fetch debug info: %v", err) + } + //defer os.Remove(debugFilePath) + + // Create a request to symbolize specific addresses + req := symbolization.Request{ + BuildID: buildID, + Mappings: []symbolization.RequestMapping{ + { + Locations: []*symbolization.Location{ + { + Address: 0x1500, + Mapping: &pprof.Mapping{}, + }, + { + Address: 0x3c5a, + Mapping: &pprof.Mapping{}, + }, + { + Address: 0x2745, + Mapping: &pprof.Mapping{}, + }, + }, + }, + }, + } + + if err := symbolizer.Symbolize(ctx, req); err != nil { + log.Fatalf("Failed to symbolize: %v", err) + } + + fmt.Println("Symbolization Results:") + fmt.Printf("Build ID: %s\n", buildID) + fmt.Println("----------------------------------------") + + for i, mapping := range req.Mappings { + fmt.Printf("Mapping #%d:\n", i+1) + for _, loc := range mapping.Locations { + fmt.Printf("\nAddress: 0x%x\n", loc.Address) + if len(loc.Lines) == 0 { + fmt.Println(" No symbolization information found") + continue + } + + for j, line := range loc.Lines { + fmt.Printf(" Line %d:\n", j+1) + if line.Function != nil { + fmt.Printf(" Function: %s\n", line.Function.Name) + fmt.Printf(" File: %s\n", line.Function.Filename) + fmt.Printf(" Line: %d\n", line.Line) + fmt.Printf(" StartLine: %d\n", line.Function.StartLine) + } else { + fmt.Println(" No function information available") + } + } + fmt.Println("----------------------------------------") + } + } + + // Alternatively: Symbolize all addresses in the binary + // Note: Comment out the above specific symbolization when using this + // as it's a different approach meant for exploring all available symbols + //if err := symbolizer.SymbolizeAll(ctx, buildID); err != nil { + // log.Fatalf("Failed to symbolize all addresses: %v", err) + //} + + fmt.Println("\nSymbolization completed successfully.") +} + +// localDebuginfodClient provides a way to use local debug info files instead of fetching from a server +// +//nolint:all +type localDebuginfodClient struct { + debugFilePath string +} + +//nolint:all +func (c *localDebuginfodClient) FetchDebuginfo(buildID string) (string, error) { + return c.debugFilePath, nil +} diff --git a/go.mod b/go.mod index 837123123e..bdf48ec153 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93 github.com/felixge/httpsnoop v1.0.4 github.com/fsnotify/fsnotify v1.7.0 + github.com/go-delve/delve v1.23.1 github.com/go-kit/log v0.2.1 github.com/gogo/protobuf v1.3.2 github.com/gogo/status v1.1.1 diff --git a/go.sum b/go.sum index 7daa99e211..f700e8255e 100644 --- a/go.sum +++ b/go.sum @@ -241,6 +241,8 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fullstorydev/emulators/storage v0.0.0-20240401123056-edc69752f474 h1:TufioMBjkJ6/Oqmlye/ReuxHFS35HyLmypj/BNy/8GY= github.com/fullstorydev/emulators/storage v0.0.0-20240401123056-edc69752f474/go.mod h1:PQwxF4UU8wuL+srGxr3BOhIW5zXqgucwVlO/nPZLsxw= +github.com/go-delve/delve v1.23.1 h1:MtZ13ppptttkqSuvVnwJ5CPhIAzDiOwRrYuCk3ES7fU= +github.com/go-delve/delve v1.23.1/go.mod h1:S3SLuEE2mn7wipKilTvk1p9HdTMnXXElcEpiZ+VcuqU= github.com/go-fonts/dejavu v0.3.4 h1:Qqyx9IOs5CQFxyWTdvddeWzrX0VNwUAvbmAzL0fpjbc= github.com/go-fonts/dejavu v0.3.4/go.mod h1:D1z0DglIz+lmpeNYMYlxW4r22IhcdOYnt+R3PShU/Kg= github.com/go-fonts/latin-modern v0.3.3 h1:g2xNgI8yzdNzIVm+qvbMryB6yGPe0pSMss8QT3QwlJ0= diff --git a/go.work.sum b/go.work.sum index 1ff838dea0..78f4d2eee1 100644 --- a/go.work.sum +++ b/go.work.sum @@ -628,13 +628,21 @@ github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3 github.com/coreos/go-oidc/v3 v3.5.0 h1:VxKtbccHZxs8juq7RdJntSqtXFtde9YpNpGn0yqgEHw= github.com/coreos/go-oidc/v3 v3.5.0/go.mod h1:ecXRtV4romGPeO6ieExAsUK9cb/3fp9hXNz1tlv8PIM= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cosiner/argv v0.1.0 h1:BVDiEL32lwHukgJKP87btEPenzrrHUjajs/8yzaqcXg= +github.com/cosiner/argv v0.1.0/go.mod h1:EusR6TucWKX+zFgtdUsKT2Cvg45K5rtpCcWz4hK06d8= +github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9 h1:uDmaGzcdjhF4i/plgjmEsriH11Y0o7RKapEf/LDaM3w= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.20 h1:VIPb/a2s17qNeQgDnkfZC35RScx+blkKF8GV68n80J4= +github.com/creack/pty v1.1.20/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/cristalhq/hedgedhttp v0.9.1 h1:g68L9cf8uUyQKQJwciD0A1Vgbsz+QgCjuB1I8FAsCDs= github.com/cristalhq/hedgedhttp v0.9.1/go.mod h1:XkqWU6qVMutbhW68NnzjWrGtH8NUx1UfYqGYtHVKIsI= github.com/davecgh/go-xdr v0.0.0-20161123171359-e6a2ba005892 h1:qg9VbHo1TlL0KDM0vYvBG9EY0X0Yku5WYIPoFWt8f6o= github.com/davecgh/go-xdr v0.0.0-20161123171359-e6a2ba005892/go.mod h1:CTDl0pzVzE5DEzZhPfvhY/9sPFMQIxaJ9VAMs9AagrE= +github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d h1:hUWoLdw5kvo2xCsqlsIBMvWUc1QCSsCYD2J2+Fg6YoU= +github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d/go.mod h1:C7Es+DLenIpPc9J6IYw4jrK0h7S9bKj4DNl8+KxGEXU= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/digitalocean/godo v1.104.1/go.mod h1:VAI/L5YDzMuPRU01lEEUSQ/sp5Z//1HnnFv/RBTEdbg= @@ -658,6 +666,8 @@ github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2 github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 h1:Mn26/9ZMNWSw9C9ERFA1PUxfmGpolnw2v0bKOREu5ew= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I= +github.com/go-delve/liner v1.2.3-0.20231231155935-4726ab1d7f62 h1:IGtvsNyIuRjl04XAOFGACozgUD7A82UffYxZt4DWbvA= +github.com/go-delve/liner v1.2.3-0.20231231155935-4726ab1d7f62/go.mod h1:biJCRbqp51wS+I92HMqn5H8/A0PAhxn2vyOT+JqhiGI= github.com/go-fonts/stix v0.2.2 h1:v9krocr13J1llaOHLEol1eaHsv8S43UuFX/1bFgEJJ4= github.com/go-fonts/stix v0.2.2/go.mod h1:SUxggC9dxd/Q+rb5PkJuvfvTbOPtNc2Qaua00fIp9iU= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1 h1:QbL/5oDUmRBzO9/Z7Seo6zf912W/a6Sr4Eu0G/3Jho0= @@ -739,6 +749,8 @@ github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9 github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM= github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-dap v0.12.0 h1:rVcjv3SyMIrpaOoTAdFDyHs99CwVOItIJGKLQFQhNeM= +github.com/google/go-dap v0.12.0/go.mod h1:tNjCASCm5cqePi/RVXXWEVqtnNLV1KTWtYOqu6rZNzc= github.com/google/go-pkcs11 v0.2.1-0.20230907215043-c6f79328ddf9 h1:OF1IPgv+F4NmqmJ98KTjdN97Vs1JxDPB3vbmYzV2dpk= github.com/google/go-pkcs11 v0.2.1-0.20230907215043-c6f79328ddf9/go.mod h1:6eQoGcuNJpa7jnd5pMGdkSaQpNDYvPlXWMcjXXThLlY= github.com/google/pprof v0.0.0-20230926050212-f7f687d19a98/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= @@ -793,6 +805,8 @@ github.com/hudl/fargo v1.4.0/go.mod h1:9Ai6uvFy5fQNq6VPKtg+Ceq1+eTY4nKUlR2JElEOc github.com/iancoleman/strcase v0.2.0 h1:05I4QRnGpI0m37iZQRuskXh+w77mr6Z41lwQzuHLwW0= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab h1:HqW4xhhynfjrtEiiSGcQUd6vrK23iMam1FO8rI7mwig= github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/ionos-cloud/sdk-go/v6 v6.1.9/go.mod h1:EzEgRIDxBELvfoa/uBN0kOQaqovLjUWEB7iW4/Q+t4k= @@ -963,6 +977,8 @@ github.com/rs/cors v1.10.1 h1:L0uuZVXIKlI1SShY2nhFfo44TYvDPQ1w4oFkUJNfhyo= github.com/rs/cors v1.10.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww= github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245 h1:K1Xf3bKttbF+koVGaX5xngRIZ5bVjbmPnaxE/dR08uY= github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= github.com/ryanuber/columnize v2.1.0+incompatible h1:j1Wcmh8OrK4Q7GXY+V7SVSY8nUWQxHW5TkBe7YUl+2s= @@ -993,6 +1009,8 @@ github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= @@ -1082,9 +1100,13 @@ go.opentelemetry.io/otel/trace v1.17.0/go.mod h1:I/4vKTgFclIsXRVucpH25X0mpFSczM7 go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= go.opentelemetry.io/otel/trace v1.22.0/go.mod h1:RbbHXVqKES9QhzZq/fE5UnOSILqRt40a21sPw2He1xo= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= +go.starlark.net v0.0.0-20231101134539-556fd59b42f6 h1:+eC0F/k4aBLC4szgOcjd7bDTEnpxADJyWJE0yowgM3E= +go.starlark.net v0.0.0-20231101134539-556fd59b42f6/go.mod h1:LcLNIzVOMp4oV+uusnpk+VU+SzXaJakUuBjoCSWH5dM= go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= +golang.org/x/arch v0.6.0 h1:S0JTfE48HbRj80+4tbvZDYsJ3tGv6BUU3XxyZ7CirAc= +golang.org/x/arch v0.6.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20200414173820-0848c9571904/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= diff --git a/pkg/experiment/symbolization/addrmapper.go b/pkg/experiment/symbolization/addrmapper.go new file mode 100644 index 0000000000..3fc020e9ac --- /dev/null +++ b/pkg/experiment/symbolization/addrmapper.go @@ -0,0 +1,190 @@ +package symbolization + +import ( + "debug/elf" + "fmt" +) + +// BinaryLayout contains the information needed to translate between runtime addresses +// and the addresses in the debug information. This is necessary because: +// 1. Executables (ET_EXEC) use fixed addresses but may need segment offset adjustments +// 2. Shared libraries (ET_DYN) can be loaded at any address, requiring base address calculations +// 3. Relocatable files (ET_REL) need special handling for their relocations +type BinaryLayout struct { + ElfType uint16 + ProgramHeaders []MemoryRegion +} + +// MemoryRegion represents a loadable segment in the ELF file. +// These segments define how the program should be loaded into memory: +// - Off: where the segment data starts in the file +// - Vaddr: the virtual address where the segment should be loaded +// - Memsz: how much memory the segment occupies when loaded +type MemoryRegion struct { + Off uint64 // File offset + Vaddr uint64 // Virtual address + Filesz uint64 // Size in file + Memsz uint64 // Size in memory (may be larger than Filesz due to .bss) + Type uint32 +} + +func ExecutableInfoFromELF(f *elf.File) (*BinaryLayout, error) { + loadableSegments := make([]MemoryRegion, 0, len(f.Progs)) + for _, segment := range f.Progs { + if segment.Type == elf.PT_LOAD { + loadableSegments = append(loadableSegments, MemoryRegion{ + Off: segment.Off, + Vaddr: segment.Vaddr, + Filesz: segment.Filesz, + Memsz: segment.Memsz, + Type: uint32(segment.Type), + }) + } + } + + return &BinaryLayout{ + ElfType: uint16(f.Type), + ProgramHeaders: loadableSegments, + }, nil +} + +// MapRuntimeAddress translates a runtime address to its corresponding address +// in the debug information. This translation is necessary because: +// - The program might be loaded at a different address than it was linked for +// - Different segments might need different adjustments +// - Various ELF types (EXEC, DYN, REL) handle addressing differently +func MapRuntimeAddress(runtimeAddr uint64, ei *BinaryLayout, m Mapping) (uint64, error) { + baseOffset, err := CalculateBase(ei, m, runtimeAddr) + if err != nil { + return runtimeAddr, fmt.Errorf("calculate base offset: %w", err) + } + + return runtimeAddr - baseOffset, nil +} + +// CalculateBase determines the base address adjustment needed for address translation. +// The calculation varies depending on the ELF type: +// - ET_EXEC: Uses fixed addresses with potential segment adjustments +// - ET_DYN: Can be loaded anywhere, needs runtime base address adjustment +// - ET_REL: Requires relocation processing +func CalculateBase(ei *BinaryLayout, m Mapping, addr uint64) (uint64, error) { + segment, err := ei.FindProgramHeader(m, addr) + if err != nil { + return 0, fmt.Errorf("find program segment: %w", err) + } + + if segment == nil { + return 0, nil + } + + // Handle special case where mapping spans entire address space + if m.Start == 0 && m.Offset == 0 && (m.Limit == ^uint64(0) || m.Limit == 0) { + return 0, nil + } + + switch elf.Type(ei.ElfType) { + case elf.ET_EXEC: + return calculateExecBase(m, segment) + case elf.ET_REL: + return calculateRelocatableBase(m) + case elf.ET_DYN: + return calculateDynamicBase(m, segment) + } + + return 0, fmt.Errorf("unsupported ELF type: %v", elf.Type(ei.ElfType)) +} + +// FindProgramHeader finds the program header containing the given address. +// It returns nil if no header is found. +func (ei *BinaryLayout) FindProgramHeader(m Mapping, addr uint64) (*MemoryRegion, error) { + // Special case: if mapping is empty (all zeros), just look for any header containing the address + if m.Start == 0 && m.Limit == 0 { + for i := range ei.ProgramHeaders { + h := &ei.ProgramHeaders[i] + if h.Type == uint32(elf.PT_LOAD) { + if h.Vaddr <= addr && addr < h.Vaddr+h.Memsz { + return h, nil + } + } + } + return nil, nil + } + + // Fast path: if address is invalid or outside reasonable range + if m.Start >= m.Limit { + return nil, fmt.Errorf("invalid mapping range: start %x >= limit %x", m.Start, m.Limit) + } + + // Special case: kernel addresses or very high addresses + if m.Limit >= (1 << 63) { + return nil, nil + } + + // No loadable segments + if len(ei.ProgramHeaders) == 0 { + return nil, nil + } + + // Calculate file offset from the address + fileOffset := addr - m.Start + m.Offset + + // Find all headers that could contain this address + var candidateHeaders []*MemoryRegion + for i := range ei.ProgramHeaders { + h := &ei.ProgramHeaders[i] + if h.Type != uint32(elf.PT_LOAD) { + continue + } + + // Check if the file offset falls within this segment + if fileOffset >= h.Off && fileOffset < h.Off+h.Memsz { + candidateHeaders = append(candidateHeaders, h) + } + } + + // No matching headers found + if len(candidateHeaders) == 0 { + return nil, nil + } + + // If only one header matches, return it + if len(candidateHeaders) == 1 { + return candidateHeaders[0], nil + } + + // Multiple headers - need to select the most appropriate one + // Choose the one with the closest starting address to our target + var bestHeader *MemoryRegion + bestDistance := uint64(^uint64(0)) // Max uint64 as initial distance + + for _, h := range candidateHeaders { + distance := addr - h.Vaddr + if distance < bestDistance { + bestDistance = distance + bestHeader = h + } + } + + return bestHeader, nil +} + +func calculateExecBase(m Mapping, h *MemoryRegion) (uint64, error) { + if h == nil { + return 0, nil + } + return m.Start - m.Offset + h.Off - h.Vaddr, nil +} + +func calculateRelocatableBase(m Mapping) (uint64, error) { + if m.Offset != 0 { + return 0, fmt.Errorf("relocatable files with non-zero offset not supported") + } + return m.Start, nil +} + +func calculateDynamicBase(m Mapping, h *MemoryRegion) (uint64, error) { + if h == nil { + return m.Start - m.Offset, nil + } + return m.Start - m.Offset + h.Off - h.Vaddr, nil +} diff --git a/pkg/experiment/symbolization/debuginfod_client.go b/pkg/experiment/symbolization/debuginfod_client.go new file mode 100644 index 0000000000..5318db1488 --- /dev/null +++ b/pkg/experiment/symbolization/debuginfod_client.go @@ -0,0 +1,54 @@ +package symbolization + +import ( + "fmt" + "io" + "net/http" + "os" + "path/filepath" +) + +type DebuginfodClient interface { + FetchDebuginfo(buildID string) (string, error) +} + +type debuginfodClient struct { + baseURL string +} + +func NewDebuginfodClient(baseURL string) DebuginfodClient { + return &debuginfodClient{ + baseURL: baseURL, + } +} + +// FetchDebuginfo fetches the debuginfo file for a specific build ID. +func (c *debuginfodClient) FetchDebuginfo(buildID string) (string, error) { + url := fmt.Sprintf("%s/buildid/%s/debuginfo", c.baseURL, buildID) + + resp, err := http.Get(url) + if err != nil { + return "", fmt.Errorf("failed to fetch debuginfod: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("unexpected HTTP status: %s", resp.Status) + } + + // Save the debuginfo to a temporary file + tempDir := os.TempDir() + filePath := filepath.Join(tempDir, fmt.Sprintf("%s.elf", buildID)) + outFile, err := os.Create(filePath) + if err != nil { + return "", fmt.Errorf("failed to create temp file: %w", err) + } + defer outFile.Close() + + _, err = io.Copy(outFile, resp.Body) + if err != nil { + return "", fmt.Errorf("failed to write debuginfod to file: %w", err) + } + + return filePath, nil +} diff --git a/pkg/experiment/symbolization/dwarf.go b/pkg/experiment/symbolization/dwarf.go new file mode 100644 index 0000000000..e2fb172c8e --- /dev/null +++ b/pkg/experiment/symbolization/dwarf.go @@ -0,0 +1,373 @@ +package symbolization + +import ( + "context" + "debug/dwarf" + "errors" + "fmt" + "io" + "sort" + + "github.com/go-delve/delve/pkg/dwarf/godwarf" + "github.com/go-delve/delve/pkg/dwarf/reader" + pprof "github.com/google/pprof/profile" +) + +// DWARFInfo implements the liner interface +type DWARFInfo struct { + debugData *dwarf.Data + lineEntries map[dwarf.Offset][]dwarf.LineEntry + subprograms map[dwarf.Offset][]*godwarf.Tree + abstractSubprograms map[dwarf.Offset]*dwarf.Entry +} + +// NewDWARFInfo creates a new liner using DWARF debug info +func NewDWARFInfo(debugData *dwarf.Data) *DWARFInfo { + return &DWARFInfo{ + debugData: debugData, + lineEntries: make(map[dwarf.Offset][]dwarf.LineEntry), + subprograms: make(map[dwarf.Offset][]*godwarf.Tree), + abstractSubprograms: make(map[dwarf.Offset]*dwarf.Entry), + } +} + +func (d *DWARFInfo) ResolveAddress(_ context.Context, addr uint64) ([]SymbolLocation, error) { + er := reader.New(d.debugData) + cu, err := er.SeekPC(addr) + if err != nil { + return nil, fmt.Errorf("no symbol information found for address 0x%x", addr) + } + if cu == nil { + return nil, errors.New("no symbol information found for address") + } + + if err := d.buildLookupTables(cu); err != nil { + return nil, err + } + + var lines []SymbolLocation + var targetTree *godwarf.Tree + for _, tree := range d.subprograms[cu.Offset] { + if tree.ContainsPC(addr) { + targetTree = tree + break + } + } + + if targetTree == nil { + return lines, nil + } + + functionName, ok := targetTree.Entry.Val(dwarf.AttrName).(string) + if !ok { + functionName = "" + } + + declLine, ok := targetTree.Entry.Val(dwarf.AttrDeclLine).(int64) + if !ok { + declLine = 0 + } + + file, line := d.findLineInfo(d.lineEntries[cu.Offset], targetTree.Ranges) + lines = append(lines, SymbolLocation{ + Function: &pprof.Function{ + Name: functionName, + Filename: file, + StartLine: declLine, + }, + Line: line, + }) + + // Enhanced inline function processing + for _, tr := range reader.InlineStack(targetTree, addr) { + + var functionName string + if tr.Tag == dwarf.TagSubprogram { + functionName, ok = targetTree.Entry.Val(dwarf.AttrName).(string) + if !ok { + functionName = "" + } + } else { + if abstractOffset, ok := tr.Entry.Val(dwarf.AttrAbstractOrigin).(dwarf.Offset); ok { + if abstractOrigin, exists := d.abstractSubprograms[abstractOffset]; exists { + functionName = d.getFunctionName(abstractOrigin) + } else { + functionName = "?" + } + } else { + functionName = "?" + } + } + + declLine, ok := tr.Entry.Val(dwarf.AttrDeclLine).(int64) + if !ok { + declLine = 0 + } + + file, line := d.findLineInfo(d.lineEntries[cu.Offset], tr.Ranges) + + lines = append(lines, SymbolLocation{ + Function: &pprof.Function{ + Name: functionName, + Filename: file, + StartLine: declLine, + }, + Line: line, + }) + } + + return lines, nil +} + +func (d *DWARFInfo) resolveFunctionName(entry *dwarf.Entry) string { + if entry == nil { + return "?" + } + + if name, ok := entry.Val(dwarf.AttrName).(string); ok { + return name + } + if name, ok := entry.Val(dwarf.AttrLinkageName).(string); ok { + return name + } + + return "?" +} + +func (d *DWARFInfo) buildLookupTables(cu *dwarf.Entry) error { + // Check if we already processed this compilation unit + if _, exists := d.lineEntries[cu.Offset]; exists { + return nil + } + + // TODO: not 100% sure about it. Review it. + // Scan all DWARF entries for abstract subprograms before processing this compilation unit. + // This scan is necessary because DWARF debug info can contain cross-compilation unit + // references, particularly for inlined functions. When a function is inlined, its + // definition (the abstract entry) may be in one compilation unit while its usage + // (via AttrAbstractOrigin) can be in another. By scanning all entries upfront, + // we ensure we can resolve these cross-unit references when they occur. + // + // For example, when a C++ standard library function is inlined (like printf from stdio.h), + // its abstract entry might be in the compilation unit for stdio.h, but we need to + // resolve its name when we find it inlined in our program's compilation unit. + if len(d.abstractSubprograms) == 0 { + if err := d.scanAbstractSubprograms(); err != nil { + return fmt.Errorf("scan abstract subprograms: %w", err) + } + } + + // Process line entries first + if err := d.processLineEntries(cu); err != nil { + return fmt.Errorf("process line entries: %w", err) + } + + // Process subprograms and their trees + if err := d.processSubprogramEntries(cu); err != nil { + return fmt.Errorf("process subprogram entries: %w", err) + } + + return nil +} + +func (d *DWARFInfo) processLineEntries(cu *dwarf.Entry) error { + lr, err := d.debugData.LineReader(cu) + if err != nil { + return fmt.Errorf("create line reader: %w", err) + } + if lr == nil { + return errors.New("no line reader available") + } + + entries := make([]dwarf.LineEntry, 0) + for { + var entry dwarf.LineEntry + err := lr.Next(&entry) + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("read line entry: %w", err) + } + + // Only store statement entries + if entry.IsStmt { + entries = append(entries, entry) + } + } + + d.lineEntries[cu.Offset] = entries + return nil +} + +func (d *DWARFInfo) processSubprogramEntries(cu *dwarf.Entry) error { + reader := d.debugData.Reader() + reader.Seek(cu.Offset) + + entry, err := reader.Next() + if err != nil { + return fmt.Errorf("read initial entry: %w", err) + } + if entry == nil || entry.Tag != dwarf.TagCompileUnit { + return fmt.Errorf("unexpected entry type at CU offset: %v", cu.Offset) + } + + subprograms := make([]*godwarf.Tree, 0) + for { + entry, err := reader.Next() + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("read entry: %w", err) + } + if entry == nil || entry.Tag == dwarf.TagCompileUnit { + break + } + + if entry.Tag != dwarf.TagSubprogram { + continue + } + + // Check for abstract entries first + isAbstract := false + for _, field := range entry.Field { + if field.Attr == dwarf.AttrInline { + d.abstractSubprograms[entry.Offset] = entry + isAbstract = true + break + } + } + + //Skip if this was an abstract entry + if isAbstract { + continue + } + + // Extract the subprogram tree + tree, err := godwarf.LoadTree(entry.Offset, d.debugData, 0) + if err != nil { + return fmt.Errorf("load subprogram tree: %w", err) + } + + subprograms = append(subprograms, tree) + } + + d.subprograms[cu.Offset] = subprograms + return nil +} + +func (d *DWARFInfo) findLineInfo(entries []dwarf.LineEntry, ranges [][2]uint64) (string, int64) { + sort.Slice(entries, func(i, j int) bool { + return entries[i].Address < entries[j].Address + }) + + // Try to find an entry that contains our target address + targetAddr := ranges[0][0] + for _, entry := range entries { + if entry.Address >= targetAddr && entry.Address < ranges[0][1] { + if entry.File != nil { + return entry.File.Name, int64(entry.Line) + } + } + } + + // Find the closest entry before our target address + var lastEntry *dwarf.LineEntry + for i := range entries { + if entries[i].Address > targetAddr { + break + } + lastEntry = &entries[i] + } + + if lastEntry != nil && lastEntry.File != nil { + return lastEntry.File.Name, int64(lastEntry.Line) + } + + return "?", 0 +} + +func (d *DWARFInfo) getFunctionName(entry *dwarf.Entry) string { + name := "?" + ok := false + if entry != nil { + for _, field := range entry.Field { + if field.Attr == dwarf.AttrName { + name, ok = field.Val.(string) + if !ok { + name = "?" + } + } + } + } + return name +} + +func (d *DWARFInfo) SymbolizeAllAddresses() map[uint64][]SymbolLocation { + results := make(map[uint64][]SymbolLocation) + + // Get all compilation units + reader := d.debugData.Reader() + for { + entry, err := reader.Next() + if err != nil || entry == nil { + break + } + + if entry.Tag != dwarf.TagCompileUnit { + continue + } + + // Get ranges for this compilation unit + ranges, err := d.debugData.Ranges(entry) + if err != nil { + fmt.Printf("Warning: Failed to get ranges for CU: %v\n", err) + continue + } + + for _, rng := range ranges { + // Skip invalid ranges + if rng[0] >= rng[1] { + continue + } + + // Sample multiple points in this range + addresses := []uint64{ + rng[0], // start + rng[0] + (rng[1]-rng[0])/2, // middle + rng[1] - 1, // end (exclusive) + } + + for _, addr := range addresses { + lines, err := d.ResolveAddress(context.Background(), addr) + if err != nil { + continue + } + + if len(lines) > 0 { + results[addr] = lines + } + } + } + } + + return results +} + +func (d *DWARFInfo) scanAbstractSubprograms() error { + reader := d.debugData.Reader() + // Scan from the start, don't stop at first CU + for { + entry, err := reader.Next() + if err != nil || entry == nil { + break + } + + if entry.Tag == dwarf.TagSubprogram { + // Store ALL subprograms, not just inline ones + d.abstractSubprograms[entry.Offset] = entry + } + } + return nil +} diff --git a/pkg/experiment/symbolization/symbolizer.go b/pkg/experiment/symbolization/symbolizer.go new file mode 100644 index 0000000000..7f9d4fab32 --- /dev/null +++ b/pkg/experiment/symbolization/symbolizer.go @@ -0,0 +1,139 @@ +package symbolization + +import ( + "context" + "debug/dwarf" + "debug/elf" + "fmt" +) + +// DwarfResolver implements the liner interface +type DwarfResolver struct { + debugData *dwarf.Data + dbgFile *DWARFInfo + file *elf.File +} + +func NewDwarfResolver(f *elf.File) (SymbolResolver, error) { + debugData, err := f.DWARF() + if err != nil { + return nil, fmt.Errorf("read DWARF data: %w", err) + } + + debugInfo := NewDWARFInfo(debugData) + + return &DwarfResolver{ + debugData: debugData, + dbgFile: debugInfo, + file: f, + }, nil +} + +func (d *DwarfResolver) ResolveAddress(ctx context.Context, pc uint64) ([]SymbolLocation, error) { + return d.dbgFile.ResolveAddress(ctx, pc) +} + +func (d *DwarfResolver) Close() error { + return d.file.Close() +} + +type Symbolizer struct { + client DebuginfodClient +} + +func NewSymbolizer(client DebuginfodClient) *Symbolizer { + return &Symbolizer{ + client: client, + } +} + +func (s *Symbolizer) Symbolize(ctx context.Context, req Request) error { + // Fetch debug info file + debugFilePath, err := s.client.FetchDebuginfo(req.BuildID) + if err != nil { + return fmt.Errorf("fetch debuginfo: %w", err) + } + + // Open ELF file + f, err := elf.Open(debugFilePath) + if err != nil { + return fmt.Errorf("open ELF file: %w", err) + } + defer f.Close() + + // Get executable info for address normalization + ei, err := ExecutableInfoFromELF(f) + if err != nil { + return fmt.Errorf("executable info from ELF: %w", err) + } + + // Create liner + liner, err := NewDwarfResolver(f) + if err != nil { + return fmt.Errorf("create liner: %w", err) + } + //defer liner.Close() + + // Process each mapping's locations + for _, mapping := range req.Mappings { + for _, loc := range mapping.Locations { + addr, err := MapRuntimeAddress(loc.Address, ei, Mapping{ + Start: loc.Mapping.Start, + Limit: loc.Mapping.Limit, + Offset: loc.Mapping.Offset, + }) + if err != nil { + return fmt.Errorf("normalize address: %w", err) + } + + // Get source lines for the address + lines, err := liner.ResolveAddress(ctx, addr) + if err != nil { + continue // Skip errors for individual addresses + } + + // Update the location directly (this is why Parca modifies the request - it's updating the shared locations) + loc.Lines = lines + } + } + + return nil +} + +func (s *Symbolizer) SymbolizeAll(ctx context.Context, buildID string) error { + // Reuse the existing debuginfo file + debugFilePath, err := s.client.FetchDebuginfo(buildID) + if err != nil { + return fmt.Errorf("fetch debuginfo: %w", err) + } + + f, err := elf.Open(debugFilePath) + if err != nil { + return fmt.Errorf("open ELF file: %w", err) + } + defer f.Close() + + debugData, err := f.DWARF() + if err != nil { + return fmt.Errorf("get DWARF data: %w", err) + } + + debugInfo := NewDWARFInfo(debugData) + allSymbols := debugInfo.SymbolizeAllAddresses() + + fmt.Println("\nSymbolizing all addresses in DWARF file:") + fmt.Println("----------------------------------------") + + for addr, lines := range allSymbols { + fmt.Printf("\nAddress: 0x%x\n", addr) + for _, line := range lines { + fmt.Printf(" Function: %s\n", line.Function.Name) + fmt.Printf(" File: %s\n", line.Function.Filename) + fmt.Printf(" Line: %d\n", line.Line) + fmt.Printf(" StartLine: %d\n", line.Function.StartLine) + fmt.Println("----------------------------------------") + } + } + + return nil +} diff --git a/pkg/experiment/symbolization/types.go b/pkg/experiment/symbolization/types.go new file mode 100644 index 0000000000..d81471d0ec --- /dev/null +++ b/pkg/experiment/symbolization/types.go @@ -0,0 +1,45 @@ +package symbolization + +import ( + "context" + + pprof "github.com/google/pprof/profile" +) + +// SymbolLocation represents a resolved source code location with function information +type SymbolLocation struct { + Function *pprof.Function + Line int64 +} + +// Location represents a memory address to be symbolized +type Location struct { + ID string + Address uint64 + Lines []SymbolLocation + Mapping *pprof.Mapping +} + +// Request represents a symbolization request for multiple addresses +type Request struct { + BuildID string + Mappings []RequestMapping +} + +type RequestMapping struct { + Locations []*Location +} + +// Mapping describes how a binary section is mapped in memory +type Mapping struct { + Start uint64 + End uint64 + Limit uint64 + Offset uint64 +} + +// SymbolResolver converts memory addresses to source code locations +type SymbolResolver interface { + ResolveAddress(ctx context.Context, addr uint64) ([]SymbolLocation, error) + //Close() error +}