From 0c5aa964b05cd96bb6a052a67afa8be7779acb83 Mon Sep 17 00:00:00 2001 From: Razakhel Date: Sun, 19 Jan 2025 10:57:55 +0100 Subject: [PATCH] [Extern/Tracy] Updated Tracy to 0.11.1 --- extern/tracy/LICENSE | 2 +- extern/tracy/TracyClient.cpp | 31 +- extern/tracy/client/TracyArmCpuTable.hpp | 18 + extern/tracy/client/TracyCallstack.cpp | 533 +++++++++++++++++++---- extern/tracy/client/TracyCallstack.hpp | 27 +- extern/tracy/client/TracyKCore.cpp | 121 +++++ extern/tracy/client/TracyKCore.hpp | 37 ++ extern/tracy/client/TracyProfiler.cpp | 475 +++++++++++++++++--- extern/tracy/client/TracyProfiler.hpp | 36 +- extern/tracy/client/TracyScoped.hpp | 61 ++- extern/tracy/client/TracySysTrace.cpp | 19 +- extern/tracy/client/tracy_rpmalloc.cpp | 5 +- extern/tracy/common/TracyProtocol.hpp | 4 +- extern/tracy/common/TracyQueue.hpp | 21 + extern/tracy/common/TracySocket.cpp | 3 + extern/tracy/common/TracySystem.cpp | 57 ++- extern/tracy/common/TracySystem.hpp | 13 +- extern/tracy/common/TracyVersion.hpp | 4 +- extern/tracy/common/tracy_lz4.cpp | 4 +- extern/tracy/libbacktrace/dwarf.cpp | 43 +- extern/tracy/libbacktrace/elf.cpp | 198 +++++++-- extern/tracy/libbacktrace/fileline.cpp | 75 +++- extern/tracy/libbacktrace/internal.hpp | 7 + extern/tracy/tracy/Tracy.hpp | 19 +- extern/tracy/tracy/TracyC.h | 51 ++- extern/tracy/tracy/TracyD3D11.hpp | 2 +- extern/tracy/tracy/TracyLua.hpp | 23 +- extern/tracy/tracy/TracyOpenCL.hpp | 4 +- extern/tracy/tracy/TracyVulkan.hpp | 40 +- 29 files changed, 1622 insertions(+), 311 deletions(-) create mode 100644 extern/tracy/client/TracyKCore.cpp create mode 100644 extern/tracy/client/TracyKCore.hpp diff --git a/extern/tracy/LICENSE b/extern/tracy/LICENSE index 72a6fe1c..25a4f18d 100644 --- a/extern/tracy/LICENSE +++ b/extern/tracy/LICENSE @@ -1,7 +1,7 @@ Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the 3-clause BSD license. -Copyright (c) 2017-2023, Bartosz Taudul +Copyright (c) 2017-2024, Bartosz Taudul All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/extern/tracy/TracyClient.cpp b/extern/tracy/TracyClient.cpp index 26387b76..6224f48b 100644 --- a/extern/tracy/TracyClient.cpp +++ b/extern/tracy/TracyClient.cpp @@ -30,21 +30,24 @@ #include "client/TracyDxt1.cpp" #include "client/TracyAlloc.cpp" #include "client/TracyOverride.cpp" - -#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 -# include "libbacktrace/alloc.cpp" -# include "libbacktrace/dwarf.cpp" -# include "libbacktrace/fileline.cpp" -# include "libbacktrace/mmapio.cpp" -# include "libbacktrace/posix.cpp" -# include "libbacktrace/sort.cpp" -# include "libbacktrace/state.cpp" -# if TRACY_HAS_CALLSTACK == 4 -# include "libbacktrace/macho.cpp" -# else -# include "libbacktrace/elf.cpp" +#include "client/TracyKCore.cpp" + +#if defined(TRACY_HAS_CALLSTACK) +# if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 +# include "libbacktrace/alloc.cpp" +# include "libbacktrace/dwarf.cpp" +# include "libbacktrace/fileline.cpp" +# include "libbacktrace/mmapio.cpp" +# include "libbacktrace/posix.cpp" +# include "libbacktrace/sort.cpp" +# include "libbacktrace/state.cpp" +# if TRACY_HAS_CALLSTACK == 4 +# include "libbacktrace/macho.cpp" +# else +# include "libbacktrace/elf.cpp" +# endif +# include "common/TracyStackFrames.cpp" # endif -# include "common/TracyStackFrames.cpp" #endif #ifdef _MSC_VER diff --git a/extern/tracy/client/TracyArmCpuTable.hpp b/extern/tracy/client/TracyArmCpuTable.hpp index 2b445976..2b47c3a6 100644 --- a/extern/tracy/client/TracyArmCpuTable.hpp +++ b/extern/tracy/client/TracyArmCpuTable.hpp @@ -305,6 +305,14 @@ static const char* DecodeIosDevice( const char* id ) "iPhone14,4", "iPhone 13 Mini", "iPhone14,5", "iPhone 13", "iPhone14,6", "iPhone SE 3rd Gen", + "iPhone14,7", "iPhone 14", + "iPhone14,8", "iPhone 14 Plus", + "iPhone15,2", "iPhone 14 Pro", + "iPhone15,3", "iPhone 14 Pro Max", + "iPhone15,4", "iPhone 15", + "iPhone15,5", "iPhone 15 Plus", + "iPhone16,1", "iPhone 15 Pro", + "iPhone16,2", "iPhone 15 Pro Max", "iPad1,1", "iPad (A1219/A1337)", "iPad2,1", "iPad 2 (A1395)", "iPad2,2", "iPad 2 (A1396)", @@ -365,6 +373,8 @@ static const char* DecodeIosDevice( const char* id ) "iPad11,4", "iPad Air 3rd gen (A2123/A2153/A2154)", "iPad11,6", "iPad 8th gen (WiFi)", "iPad11,7", "iPad 8th gen (WiFi+Cellular)", + "iPad12,1", "iPad 9th Gen (WiFi)", + "iPad12,2", "iPad 9th Gen (WiFi+Cellular)", "iPad13,1", "iPad Air 4th gen (WiFi)", "iPad13,2", "iPad Air 4th gen (WiFi+Cellular)", "iPad13,4", "iPad Pro 11\" 3rd gen", @@ -377,6 +387,14 @@ static const char* DecodeIosDevice( const char* id ) "iPad13,11", "iPad Pro 12.9\" 5th gen", "iPad13,16", "iPad Air 5th Gen (WiFi)", "iPad13,17", "iPad Air 5th Gen (WiFi+Cellular)", + "iPad13,18", "iPad 10th Gen", + "iPad13,19", "iPad 10th Gen", + "iPad14,1", "iPad mini 6th Gen (WiFi)", + "iPad14,2", "iPad mini 6th Gen (WiFi+Cellular)", + "iPad14,3", "iPad Pro 11\" 4th Gen", + "iPad14,4", "iPad Pro 11\" 4th Gen", + "iPad14,5", "iPad Pro 12.9\" 6th Gen", + "iPad14,6", "iPad Pro 12.9\" 6th Gen", "iPod1,1", "iPod Touch", "iPod2,1", "iPod Touch 2nd gen", "iPod3,1", "iPod Touch 3rd gen", diff --git a/extern/tracy/client/TracyCallstack.cpp b/extern/tracy/client/TracyCallstack.cpp index 0de7c9d2..946a1972 100644 --- a/extern/tracy/client/TracyCallstack.cpp +++ b/extern/tracy/client/TracyCallstack.cpp @@ -3,10 +3,12 @@ #include #include #include "TracyCallstack.hpp" +#include "TracyDebug.hpp" #include "TracyFastVector.hpp" #include "TracyStringHelpers.hpp" #include "../common/TracyAlloc.hpp" -#include "TracyDebug.hpp" +#include "../common/TracySystem.hpp" + #ifdef TRACY_HAS_CALLSTACK @@ -31,7 +33,6 @@ # include # include # include -# include "TracyFastVector.hpp" #elif TRACY_HAS_CALLSTACK == 5 # include # include @@ -66,7 +67,7 @@ extern "C" extern "C" const char* ___tracy_demangle( const char* mangled ); #ifndef TRACY_DEMANGLE -constexpr size_t ___tracy_demangle_buffer_len = 1024*1024; +constexpr size_t ___tracy_demangle_buffer_len = 1024*1024; char* ___tracy_demangle_buffer; void ___tracy_init_demangle_buffer() @@ -90,9 +91,177 @@ extern "C" const char* ___tracy_demangle( const char* mangled ) #endif #endif +#if TRACY_HAS_CALLSTACK == 3 +# define TRACY_USE_IMAGE_CACHE +# include +#endif + namespace tracy { +#ifdef TRACY_USE_IMAGE_CACHE +// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths +// so we can quickly determine which image an address falls into. +// We refresh this cache only when we hit an address that doesn't fall into any known range. +class ImageCache +{ +public: + struct ImageEntry + { + void* m_startAddress = nullptr; + void* m_endAddress = nullptr; + char* m_name = nullptr; + }; + + ImageCache() + : m_images( 512 ) + { + Refresh(); + } + + ~ImageCache() + { + Clear(); + } + + const ImageEntry* GetImageForAddress( void* address ) + { + const ImageEntry* entry = GetImageForAddressImpl( address ); + if( !entry ) + { + Refresh(); + return GetImageForAddressImpl( address ); + } + return entry; + } + +private: + tracy::FastVector m_images; + bool m_updated = false; + bool m_haveMainImageName = false; + + static int Callback( struct dl_phdr_info* info, size_t size, void* data ) + { + ImageCache* cache = reinterpret_cast( data ); + + const auto startAddress = reinterpret_cast( info->dlpi_addr ); + if( cache->Contains( startAddress ) ) return 0; + + const uint32_t headerCount = info->dlpi_phnum; + assert( headerCount > 0); + const auto endAddress = reinterpret_cast( info->dlpi_addr + + info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz); + + ImageEntry* image = cache->m_images.push_next(); + image->m_startAddress = startAddress; + image->m_endAddress = endAddress; + + // the base executable name isn't provided when iterating with dl_iterate_phdr, + // we will have to patch the executable image name outside this callback + if( info->dlpi_name && info->dlpi_name[0] != '\0' ) + { + size_t sz = strlen( info->dlpi_name ) + 1; + image->m_name = (char*)tracy_malloc( sz ); + memcpy( image->m_name, info->dlpi_name, sz ); + } + else + { + image->m_name = nullptr; + } + + cache->m_updated = true; + + return 0; + } + + bool Contains( void* startAddress ) const + { + return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } ); + } + + void Refresh() + { + m_updated = false; + dl_iterate_phdr( Callback, this ); + + if( m_updated ) + { + std::sort( m_images.begin(), m_images.end(), + []( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } ); + + // patch the main executable image name here, as calling dl_* functions inside the dl_iterate_phdr callback might cause deadlocks + UpdateMainImageName(); + } + } + + void UpdateMainImageName() + { + if( m_haveMainImageName ) + { + return; + } + + for( ImageEntry& entry : m_images ) + { + if( entry.m_name == nullptr ) + { + Dl_info dlInfo; + if( dladdr( (void *)entry.m_startAddress, &dlInfo ) ) + { + if( dlInfo.dli_fname ) + { + size_t sz = strlen( dlInfo.dli_fname ) + 1; + entry.m_name = (char*)tracy_malloc( sz ); + memcpy( entry.m_name, dlInfo.dli_fname, sz ); + } + } + + // we only expect one entry to be null for the main executable entry + break; + } + } + + m_haveMainImageName = true; + } + + const ImageEntry* GetImageForAddressImpl( void* address ) const + { + auto it = std::lower_bound( m_images.begin(), m_images.end(), address, + []( const ImageEntry& lhs, const void* rhs ) { return lhs.m_startAddress > rhs; } ); + + if( it != m_images.end() && address < it->m_endAddress ) + { + return it; + } + return nullptr; + } + + void Clear() + { + for( ImageEntry& entry : m_images ) + { + tracy_free( entry.m_name ); + } + + m_images.clear(); + m_haveMainImageName = false; + } +}; +#endif //#ifdef TRACY_USE_IMAGE_CACHE + +// when "TRACY_SYMBOL_OFFLINE_RESOLVE" is set, instead of fully resolving symbols at runtime, +// simply resolve the offset and image name (which will be enough the resolving to be done offline) +#ifdef TRACY_SYMBOL_OFFLINE_RESOLVE +constexpr bool s_shouldResolveSymbolsOffline = true; +#else +static bool s_shouldResolveSymbolsOffline = false; +bool ShouldResolveSymbolsOffline() +{ + const char* symbolOfflineResolve = GetEnvVar( "TRACY_SYMBOL_OFFLINE_RESOLVE" ); + return (symbolOfflineResolve && symbolOfflineResolve[0] == '1'); +} +#endif // #ifdef TRACY_SYMBOL_OFFLINE_RESOLVE + #if TRACY_HAS_CALLSTACK == 1 enum { MaxCbTrace = 64 }; @@ -108,13 +277,13 @@ extern "C" typedef BOOL (__stdcall *t_SymFromInlineContext)( HANDLE hProcess, DWORD64 Address, ULONG InlineContext, PDWORD64 Displacement, PSYMBOL_INFO Symbol ); typedef BOOL (__stdcall *t_SymGetLineFromInlineContext)( HANDLE hProcess, DWORD64 qwAddr, ULONG InlineContext, DWORD64 qwModuleBaseAddress, PDWORD pdwDisplacement, PIMAGEHLP_LINE64 Line64 ); - TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0; t_SymAddrIncludeInlineTrace _SymAddrIncludeInlineTrace = 0; t_SymQueryInlineTrace _SymQueryInlineTrace = 0; t_SymFromInlineContext _SymFromInlineContext = 0; t_SymGetLineFromInlineContext _SymGetLineFromInlineContext = 0; -} + TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0; +} struct ModuleCache { @@ -136,18 +305,19 @@ struct KernelDriver KernelDriver* s_krnlCache = nullptr; size_t s_krnlCacheCnt; - void InitCallstackCritical() { ___tracy_RtlWalkFrameChain = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" ); } -void InitCallstack() +void DbgHelpInit() { - _SymAddrIncludeInlineTrace = (t_SymAddrIncludeInlineTrace)GetProcAddress( GetModuleHandleA( "dbghelp.dll" ), "SymAddrIncludeInlineTrace" ); - _SymQueryInlineTrace = (t_SymQueryInlineTrace)GetProcAddress( GetModuleHandleA( "dbghelp.dll" ), "SymQueryInlineTrace" ); - _SymFromInlineContext = (t_SymFromInlineContext)GetProcAddress( GetModuleHandleA( "dbghelp.dll" ), "SymFromInlineContext" ); - _SymGetLineFromInlineContext = (t_SymGetLineFromInlineContext)GetProcAddress( GetModuleHandleA( "dbghelp.dll" ), "SymGetLineFromInlineContext" ); + if( s_shouldResolveSymbolsOffline ) return; + + _SymAddrIncludeInlineTrace = (t_SymAddrIncludeInlineTrace)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymAddrIncludeInlineTrace"); + _SymQueryInlineTrace = (t_SymQueryInlineTrace)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymQueryInlineTrace"); + _SymFromInlineContext = (t_SymFromInlineContext)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymFromInlineContext"); + _SymGetLineFromInlineContext = (t_SymGetLineFromInlineContext)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymGetLineFromInlineContext"); #ifdef TRACY_DBGHELP_LOCK DBGHELP_INIT; @@ -157,9 +327,78 @@ void InitCallstack() SymInitialize( GetCurrentProcess(), nullptr, true ); SymSetOptions( SYMOPT_LOAD_LINES ); +#ifdef TRACY_DBGHELP_LOCK + DBGHELP_UNLOCK; +#endif +} + +DWORD64 DbgHelpLoadSymbolsForModule( const char* imageName, uint64_t baseOfDll, uint32_t bllSize ) +{ + if( s_shouldResolveSymbolsOffline ) return 0; + return SymLoadModuleEx( GetCurrentProcess(), nullptr, imageName, nullptr, baseOfDll, bllSize, nullptr, 0 ); +} + +ModuleCache* LoadSymbolsForModuleAndCache( const char* imageName, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize ) +{ + DbgHelpLoadSymbolsForModule( imageName, baseOfDll, dllSize ); + + ModuleCache* cachedModule = s_modCache->push_next(); + cachedModule->start = baseOfDll; + cachedModule->end = baseOfDll + dllSize; + + // when doing offline symbol resolution, we must store the full path of the dll for the resolving to work + if( s_shouldResolveSymbolsOffline ) + { + cachedModule->name = (char*)tracy_malloc_fast(imageNameLength + 1); + memcpy(cachedModule->name, imageName, imageNameLength); + cachedModule->name[imageNameLength] = '\0'; + } + else + { + auto ptr = imageName + imageNameLength; + while (ptr > imageName && *ptr != '\\' && *ptr != '/') ptr--; + if (ptr > imageName) ptr++; + const auto namelen = imageName + imageNameLength - ptr; + cachedModule->name = (char*)tracy_malloc_fast(namelen + 3); + cachedModule->name[0] = '['; + memcpy(cachedModule->name + 1, ptr, namelen); + cachedModule->name[namelen + 1] = ']'; + cachedModule->name[namelen + 2] = '\0'; + } + + return cachedModule; +} + +void InitCallstack() +{ +#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE + s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline(); +#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE + if( s_shouldResolveSymbolsOffline ) + { + TracyDebug("TRACY: enabling offline symbol resolving!\n"); + } + + DbgHelpInit(); + +#ifdef TRACY_DBGHELP_LOCK + DBGHELP_LOCK; +#endif + + // use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver + // and process module symbol loading at startup time - they will be loaded on demand later + // Sometimes this process can take a very long time and prevent resolving callstack frames + // symbols during that time. + const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" ); + const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' ); + if ( !initTimeModuleLoad ) + { + TracyDebug("TRACY: skipping init time dbghelper module load\n"); + } + DWORD needed; LPVOID dev[4096]; - if( EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 ) + if( initTimeModuleLoad && EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 ) { char windir[MAX_PATH]; if( !GetWindowsDirectoryA( windir, sizeof( windir ) ) ) memcpy( windir, "c:\\windows", 11 ); @@ -193,7 +432,7 @@ void InitCallstack() path = full; } - SymLoadModuleEx( GetCurrentProcess(), nullptr, path, nullptr, (DWORD64)dev[i], 0, nullptr, 0 ); + DbgHelpLoadSymbolsForModule( path, (DWORD64)dev[i], 0 ); const auto psz = strlen( path ); auto pptr = (char*)tracy_malloc_fast( psz+1 ); @@ -214,7 +453,7 @@ void InitCallstack() HANDLE proc = GetCurrentProcess(); HMODULE mod[1024]; - if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 ) + if( initTimeModuleLoad && EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 ) { const auto sz = needed / sizeof( HMODULE ); for( size_t i=0; i 0 ) + const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 ); + if( nameLength > 0 ) { // This may be a new module loaded since our call to SymInitialize. // Just in case, force DbgHelp to load its pdb ! - SymLoadModuleEx(proc, NULL, name, NULL, (DWORD64)info.lpBaseOfDll, info.SizeOfImage, NULL, 0); - - auto ptr = name + res; - while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--; - if( ptr > name ) ptr++; - const auto namelen = name + res - ptr; - auto cache = s_modCache->push_next(); - cache->start = base; - cache->end = base + info.SizeOfImage; - cache->name = (char*)tracy_malloc_fast( namelen+3 ); - cache->name[0] = '['; - memcpy( cache->name+1, ptr, namelen ); - cache->name[namelen+1] = ']'; - cache->name[namelen+2] = '\0'; + LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage ); } } } @@ -259,6 +484,8 @@ void EndCallstack() const char* DecodeCallstackPtrFast( uint64_t ptr ) { + if( s_shouldResolveSymbolsOffline ) return "[unresolved]"; + static char ret[MaxNameSize]; const auto proc = GetCurrentProcess(); @@ -294,7 +521,13 @@ const char* GetKernelModulePath( uint64_t addr ) return it->path; } -static const char* GetModuleNameAndPrepareSymbols( uint64_t addr ) +struct ModuleNameAndBaseAddress +{ + const char* name; + uint64_t baseAddr; +}; + +ModuleNameAndBaseAddress GetModuleNameAndPrepareSymbols( uint64_t addr ) { if( ( addr >> 63 ) != 0 ) { @@ -303,17 +536,17 @@ static const char* GetModuleNameAndPrepareSymbols( uint64_t addr ) auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } ); if( it != s_krnlCache + s_krnlCacheCnt ) { - return it->mod; + return ModuleNameAndBaseAddress{ it->mod, it->addr }; } } - return ""; + return ModuleNameAndBaseAddress{ "", addr }; } for( auto& v : *s_modCache ) { if( addr >= v.start && addr < v.end ) { - return v.name; + return ModuleNameAndBaseAddress{ v.name, v.start }; } } @@ -334,35 +567,33 @@ static const char* GetModuleNameAndPrepareSymbols( uint64_t addr ) if( addr >= base && addr < base + info.SizeOfImage ) { char name[1024]; - const auto res = GetModuleFileNameA( mod[i], name, 1021 ); - if( res > 0 ) + const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 ); + if( nameLength > 0 ) { // since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize) - SymLoadModuleEx(proc, NULL, name, NULL, (DWORD64)info.lpBaseOfDll, info.SizeOfImage, NULL, 0); - auto ptr = name + res; - while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--; - if( ptr > name ) ptr++; - const auto namelen = name + res - ptr; - auto cache = s_modCache->push_next(); - cache->start = base; - cache->end = base + info.SizeOfImage; - cache->name = (char*)tracy_malloc_fast( namelen+3 ); - cache->name[0] = '['; - memcpy( cache->name+1, ptr, namelen ); - cache->name[namelen+1] = ']'; - cache->name[namelen+2] = '\0'; - return cache->name; + ModuleCache* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage ); + return ModuleNameAndBaseAddress{ cachedModule->name, cachedModule->start }; } } } } } - return "[unknown]"; + + return ModuleNameAndBaseAddress{ "[unknown]", 0x0 }; } CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) { CallstackSymbolData sym; + + if( s_shouldResolveSymbolsOffline ) + { + sym.file = "[unknown]"; + sym.line = 0; + sym.needFree = false; + return sym; + } + IMAGEHLP_LINE64 line; DWORD displacement = 0; line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); @@ -390,15 +621,32 @@ CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) { - int write; - const auto proc = GetCurrentProcess(); +#ifdef TRACY_DBGHELP_LOCK + DBGHELP_LOCK; +#endif + InitRpmalloc(); + const ModuleNameAndBaseAddress moduleNameAndAddress = GetModuleNameAndPrepareSymbols( ptr ); + + if( s_shouldResolveSymbolsOffline ) + { #ifdef TRACY_DBGHELP_LOCK - DBGHELP_LOCK; + DBGHELP_UNLOCK; #endif - const auto moduleName = GetModuleNameAndPrepareSymbols(ptr); + cb_data[0].symAddr = ptr - moduleNameAndAddress.baseAddr; + cb_data[0].symLen = 0; + + cb_data[0].name = CopyStringFast("[unresolved]"); + cb_data[0].file = CopyStringFast("[unknown]"); + cb_data[0].line = 0; + + return { cb_data, 1, moduleNameAndAddress.name }; + } + + int write; + const auto proc = GetCurrentProcess(); #if !defined TRACY_NO_CALLSTACK_INLINES BOOL doInline = FALSE; @@ -448,7 +696,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) cb_data[write].line = line.LineNumber; } - cb_data[write].name = symValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleName ); + cb_data[write].name = symValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleNameAndAddress.name ); cb_data[write].file = CopyStringFast( filename ); if( symValid ) { @@ -481,7 +729,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) cb.line = line.LineNumber; } - cb.name = symInlineValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleName ); + cb.name = symInlineValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleNameAndAddress.name ); cb.file = CopyStringFast( filename ); if( symInlineValid ) { @@ -502,17 +750,21 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) DBGHELP_UNLOCK; #endif - return { cb_data, uint8_t( cb_num ), moduleName }; + return { cb_data, uint8_t( cb_num ), moduleNameAndAddress.name }; } #elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 enum { MaxCbTrace = 64 }; -struct backtrace_state* cb_bts; +struct backtrace_state* cb_bts = nullptr; + int cb_num; CallstackEntry cb_data[MaxCbTrace]; int cb_fixup; +#ifdef TRACY_USE_IMAGE_CACHE +static ImageCache* s_imageCache = nullptr; +#endif //#ifdef TRACY_USE_IMAGE_CACHE #ifdef TRACY_DEBUGINFOD debuginfod_client* s_debuginfod; @@ -525,13 +777,14 @@ struct DebugInfo int fd; }; -FastVector s_di_known( 16 ); +static FastVector* s_di_known; #endif #ifdef __linux struct KernelSymbol { uint64_t addr; + uint32_t size; const char* name; const char* mod; }; @@ -543,10 +796,11 @@ static void InitKernelSymbols() { FILE* f = fopen( "/proc/kallsyms", "rb" ); if( !f ) return; - tracy::FastVector tmpSym( 1024 ); + tracy::FastVector tmpSym( 512 * 1024 ); size_t linelen = 16 * 1024; // linelen must be big enough to prevent reallocs in getline() auto linebuf = (char*)tracy_malloc( linelen ); ssize_t sz; + size_t validCnt = 0; while( ( sz = getline( &linebuf, &linelen, f ) ) != -1 ) { auto ptr = linebuf; @@ -579,7 +833,7 @@ static void InitKernelSymbols() } if( addr == 0 ) continue; ptr++; - if( *ptr != 'T' && *ptr != 't' ) continue; + const bool valid = *ptr == 'T' || *ptr == 't'; ptr += 2; const auto namestart = ptr; while( *ptr != '\t' && *ptr != '\n' ) ptr++; @@ -594,20 +848,28 @@ static void InitKernelSymbols() modend = ptr; } - auto strname = (char*)tracy_malloc_fast( nameend - namestart + 1 ); - memcpy( strname, namestart, nameend - namestart ); - strname[nameend-namestart] = '\0'; - + char* strname = nullptr; char* strmod = nullptr; - if( modstart ) + + if( valid ) { - strmod = (char*)tracy_malloc_fast( modend - modstart + 1 ); - memcpy( strmod, modstart, modend - modstart ); - strmod[modend-modstart] = '\0'; + validCnt++; + + strname = (char*)tracy_malloc_fast( nameend - namestart + 1 ); + memcpy( strname, namestart, nameend - namestart ); + strname[nameend-namestart] = '\0'; + + if( modstart ) + { + strmod = (char*)tracy_malloc_fast( modend - modstart + 1 ); + memcpy( strmod, modstart, modend - modstart ); + strmod[modend-modstart] = '\0'; + } } auto sym = tmpSym.push_next(); sym->addr = addr; + sym->size = 0; sym->name = strname; sym->mod = strmod; } @@ -615,11 +877,22 @@ static void InitKernelSymbols() fclose( f ); if( tmpSym.empty() ) return; - std::sort( tmpSym.begin(), tmpSym.end(), []( const KernelSymbol& lhs, const KernelSymbol& rhs ) { return lhs.addr > rhs.addr; } ); - s_kernelSymCnt = tmpSym.size(); - s_kernelSym = (KernelSymbol*)tracy_malloc_fast( sizeof( KernelSymbol ) * s_kernelSymCnt ); - memcpy( s_kernelSym, tmpSym.data(), sizeof( KernelSymbol ) * s_kernelSymCnt ); - TracyDebug( "Loaded %zu kernel symbols\n", s_kernelSymCnt ); + std::sort( tmpSym.begin(), tmpSym.end(), []( const KernelSymbol& lhs, const KernelSymbol& rhs ) { return lhs.addr < rhs.addr; } ); + for( size_t i=0; i*)tracy_malloc( sizeof( FastVector ) ); + new (s_di_known) FastVector( 16 ); #endif } @@ -725,11 +1018,11 @@ DebugInfo* FindDebugInfo( FastVector& vec, const uint8_t* buildid_dat int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const char* filename ) { auto buildid = (uint8_t*)buildid_data; - auto it = FindDebugInfo( s_di_known, buildid, buildid_size ); + auto it = FindDebugInfo( *s_di_known, buildid, buildid_size ); if( it ) return it->fd >= 0 ? dup( it->fd ) : -1; int fd = debuginfod_find_debuginfo( s_debuginfod, buildid, buildid_size, nullptr ); - it = s_di_known.push_next(); + it = s_di_known->push_next(); it->buildid_size = buildid_size; it->buildid = (uint8_t*)tracy_malloc( buildid_size ); memcpy( it->buildid, buildid, buildid_size ); @@ -744,7 +1037,7 @@ int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const const uint8_t* GetBuildIdForImage( const char* image, size_t& size ) { assert( image ); - for( auto& v : s_di_known ) + for( auto& v : *s_di_known ) { if( strcmp( image, v.filename ) == 0 ) { @@ -763,11 +1056,21 @@ debuginfod_client* GetDebuginfodClient() void EndCallstack() { +#ifdef TRACY_USE_IMAGE_CACHE + if( s_imageCache ) + { + s_imageCache->~ImageCache(); + tracy_free( s_imageCache ); + } +#endif //#ifdef TRACY_USE_IMAGE_CACHE #ifndef TRACY_DEMANGLE ___tracy_free_demangle_buffer(); #endif #ifdef TRACY_DEBUGINFOD - ClearDebugInfoVector( s_di_known ); + ClearDebugInfoVector( *s_di_known ); + s_di_known->~FastVector(); + tracy_free( s_di_known ); + debuginfod_end( s_debuginfod ); #endif } @@ -824,7 +1127,15 @@ static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum* CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) { CallstackSymbolData sym; - backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym ); + if( cb_bts ) + { + backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym ); + } + else + { + SymbolAddressErrorCb(&sym, nullptr, 0); + } + return sym; } @@ -927,33 +1238,67 @@ void SymInfoError( void* /*data*/, const char* /*msg*/, int /*errnum*/ ) cb_data[cb_num-1].symAddr = 0; } +void GetSymbolForOfflineResolve(void* address, uint64_t imageBaseAddress, CallstackEntry& cbEntry) +{ + // tagged with a string that we can identify as an unresolved symbol + cbEntry.name = CopyStringFast( "[unresolved]" ); + // set .so relative offset so it can be resolved offline + cbEntry.symAddr = (uint64_t)address - imageBaseAddress; + cbEntry.symLen = 0x0; + cbEntry.file = CopyStringFast( "[unknown]" ); + cbEntry.line = 0; +} + CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) { InitRpmalloc(); if( ptr >> 63 == 0 ) { - cb_num = 0; - backtrace_pcinfo( cb_bts, ptr, CallstackDataCb, CallstackErrorCb, nullptr ); - assert( cb_num > 0 ); - - backtrace_syminfo( cb_bts, ptr, SymInfoCallback, SymInfoError, nullptr ); + const char* imageName = nullptr; + uint64_t imageBaseAddress = 0x0; - const char* symloc = nullptr; +#ifdef TRACY_USE_IMAGE_CACHE + const auto* image = s_imageCache->GetImageForAddress((void*)ptr); + if( image ) + { + imageName = image->m_name; + imageBaseAddress = uint64_t(image->m_startAddress); + } +#else Dl_info dlinfo; - if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname; + if( dladdr( (void*)ptr, &dlinfo ) ) + { + imageName = dlinfo.dli_fname; + imageBaseAddress = uint64_t( dlinfo.dli_fbase ); + } +#endif + + if( s_shouldResolveSymbolsOffline ) + { + cb_num = 1; + GetSymbolForOfflineResolve( (void*)ptr, imageBaseAddress, cb_data[0] ); + } + else + { + cb_num = 0; + backtrace_pcinfo( cb_bts, ptr, CallstackDataCb, CallstackErrorCb, nullptr ); + assert( cb_num > 0 ); + + backtrace_syminfo( cb_bts, ptr, SymInfoCallback, SymInfoError, nullptr ); + } - return { cb_data, uint8_t( cb_num ), symloc ? symloc : "[unknown]" }; + return { cb_data, uint8_t( cb_num ), imageName ? imageName : "[unknown]" }; } #ifdef __linux else if( s_kernelSym ) { - auto it = std::lower_bound( s_kernelSym, s_kernelSym + s_kernelSymCnt, ptr, []( const KernelSymbol& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } ); + auto it = std::lower_bound( s_kernelSym, s_kernelSym + s_kernelSymCnt, ptr, []( const KernelSymbol& lhs, const uint64_t& rhs ) { return lhs.addr + lhs.size < rhs; } ); if( it != s_kernelSym + s_kernelSymCnt ) { cb_data[0].name = CopyStringFast( it->name ); cb_data[0].file = CopyStringFast( "" ); cb_data[0].line = 0; - cb_data[0].symLen = 0; + cb_data[0].symLen = it->size; cb_data[0].symAddr = it->addr; return { cb_data, 1, it->mod ? it->mod : "" }; } diff --git a/extern/tracy/client/TracyCallstack.hpp b/extern/tracy/client/TracyCallstack.hpp index 0b522b73..fdc9345d 100644 --- a/extern/tracy/client/TracyCallstack.hpp +++ b/extern/tracy/client/TracyCallstack.hpp @@ -5,22 +5,27 @@ #include "../common/TracyForceInline.hpp" #include "TracyCallstack.h" -#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5 -# include -#elif TRACY_HAS_CALLSTACK >= 3 -# include -#endif - - #ifndef TRACY_HAS_CALLSTACK namespace tracy { -static tracy_force_inline void* Callstack( int depth ) { return nullptr; } +static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; } } #else +#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5 +# include +#elif TRACY_HAS_CALLSTACK >= 3 +# ifdef TRACY_LIBUNWIND_BACKTRACE + // libunwind is, in general, significantly faster than execinfo based backtraces +# define UNW_LOCAL_ONLY +# include +# else +# include +# endif +#endif + #ifdef TRACY_DEBUGINFOD # include #endif @@ -127,7 +132,13 @@ static tracy_force_inline void* Callstack( int depth ) assert( depth >= 1 ); auto trace = (uintptr_t*)tracy_malloc( ( 1 + (size_t)depth ) * sizeof( uintptr_t ) ); + +#ifdef TRACY_LIBUNWIND_BACKTRACE + size_t num = unw_backtrace( (void**)(trace+1), depth ); +#else const auto num = (size_t)backtrace( (void**)(trace+1), depth ); +#endif + *trace = num; return trace; diff --git a/extern/tracy/client/TracyKCore.cpp b/extern/tracy/client/TracyKCore.cpp new file mode 100644 index 00000000..09d51d11 --- /dev/null +++ b/extern/tracy/client/TracyKCore.cpp @@ -0,0 +1,121 @@ +#ifdef __linux__ + +#include +#include +#include +#include +#include + +#include "TracyDebug.hpp" +#include "TracyKCore.hpp" +#include "../common/TracyAlloc.hpp" + +#if !defined(__GLIBC__) && !defined(__WORDSIZE) +// include __WORDSIZE headers for musl +# include +#endif + +namespace tracy +{ + +using elf_half = uint16_t; +using elf_word = uint32_t; +using elf_sword = int32_t; + +#if __WORDSIZE == 32 + using elf_addr = uint32_t; + using elf_off = uint32_t; + using elf_xword = uint32_t; +#else + using elf_addr = uint64_t; + using elf_off = uint64_t; + using elf_xword = uint64_t; +#endif + +struct elf_ehdr +{ + unsigned char e_ident[16]; + elf_half e_type; + elf_half e_machine; + elf_word e_version; + elf_addr e_entry; + elf_off e_phoff; + elf_off e_shoff; + elf_word e_flags; + elf_half e_ehsize; + elf_half e_phentsize; + elf_half e_phnum; + elf_half e_shentsize; + elf_half e_shnum; + elf_half e_shstrndx; +}; + +struct elf_phdr +{ + elf_word p_type; + elf_word p_flags; + elf_off p_offset; + elf_addr p_vaddr; + elf_addr p_paddr; + elf_xword p_filesz; + elf_xword p_memsz; + uint64_t p_align; // include 32-bit-only flags field for 32-bit compatibility +}; + +KCore::KCore() + : m_offsets( 16 ) +{ + m_fd = open( "/proc/kcore", O_RDONLY ); + if( m_fd == -1 ) return; + + elf_ehdr ehdr; + if( read( m_fd, &ehdr, sizeof( ehdr ) ) != sizeof( ehdr ) ) goto err; + + assert( ehdr.e_phentsize == sizeof( elf_phdr ) ); + + for( elf_half i=0; istart = phdr.p_vaddr; + ptr->size = phdr.p_memsz; + ptr->offset = phdr.p_offset; + } + + std::sort( m_offsets.begin(), m_offsets.end(), []( const Offset& lhs, const Offset& rhs ) { return lhs.start < rhs.start; } ); + TracyDebug( "KCore: %zu segments found\n", m_offsets.size() ); + return; + +err: + close( m_fd ); + m_fd = -1; +} + +KCore::~KCore() +{ + if( m_fd != -1 ) close( m_fd ); +} + +void* KCore::Retrieve( uint64_t addr, uint64_t size ) const +{ + if( m_fd == -1 ) return nullptr; + auto it = std::lower_bound( m_offsets.begin(), m_offsets.end(), addr, []( const Offset& lhs, uint64_t rhs ) { return lhs.start + lhs.size < rhs; } ); + if( it == m_offsets.end() ) return nullptr; + if( addr + size > it->start + it->size ) return nullptr; + if( lseek( m_fd, it->offset + addr - it->start, SEEK_SET ) == -1 ) return nullptr; + auto ptr = tracy_malloc( size ); + if( read( m_fd, ptr, size ) != ssize_t( size ) ) + { + tracy_free( ptr ); + return nullptr; + } + return ptr; +} + +} + +#endif \ No newline at end of file diff --git a/extern/tracy/client/TracyKCore.hpp b/extern/tracy/client/TracyKCore.hpp new file mode 100644 index 00000000..437e172c --- /dev/null +++ b/extern/tracy/client/TracyKCore.hpp @@ -0,0 +1,37 @@ +#ifndef __TRACYKCORE_HPP__ +#define __TRACYKCORE_HPP__ + +#ifdef __linux__ + +#include + +#include "TracyFastVector.hpp" + +namespace tracy +{ + +class KCore +{ + struct Offset + { + uint64_t start; + uint64_t size; + uint64_t offset; + }; + +public: + KCore(); + ~KCore(); + + void* Retrieve( uint64_t addr, uint64_t size ) const; + +private: + int m_fd; + FastVector m_offsets; +}; + +} + +#endif + +#endif diff --git a/extern/tracy/client/TracyProfiler.cpp b/extern/tracy/client/TracyProfiler.cpp index ed580123..fa930432 100644 --- a/extern/tracy/client/TracyProfiler.cpp +++ b/extern/tracy/client/TracyProfiler.cpp @@ -45,6 +45,14 @@ # include #endif +#ifdef __QNX__ +# include +# include +# include +# include +# include +#endif + #include #include #include @@ -115,6 +123,10 @@ extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PR # include #endif +#ifdef __QNX__ +extern char* __progname; +#endif + namespace tracy { @@ -157,7 +169,11 @@ static std::vector ParseMappings() { uintptr_t start_addr; uintptr_t end_addr; +#if defined(__LP64__) if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue; +#else + if (sscanf( line, "%dx-%dx", &start_addr, &end_addr ) != 2 ) continue; +#endif char* first_space = strchr( line, ' ' ); if( !first_space ) continue; char* perm = first_space + 1; @@ -255,8 +271,19 @@ static bool EnsureReadable( uintptr_t address ) MappingInfo* mapping = LookUpMapping(address); return mapping && EnsureReadable( *mapping ); } - -#endif // defined __ANDROID__ +#elif defined WIN32 +static bool EnsureReadable( uintptr_t address ) +{ + MEMORY_BASIC_INFORMATION memInfo; + VirtualQuery( reinterpret_cast( address ), &memInfo, sizeof( memInfo ) ); + return memInfo.Protect != PAGE_NOACCESS; +} +#else +static bool EnsureReadable( uintptr_t address ) +{ + return true; +} +#endif #ifndef TRACY_DELAYED_INIT @@ -281,7 +308,7 @@ struct ThreadHandleWrapper static inline void CpuId( uint32_t* regs, uint32_t leaf ) { memset(regs, 0, sizeof(uint32_t) * 4); -#if defined _WIN32 +#if defined _MSC_VER __cpuidex( (int*)regs, leaf, 0 ); #else __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 ); @@ -400,6 +427,8 @@ static const char* GetProcessName() #elif defined __APPLE__ || defined BSD auto buf = getprogname(); if( buf ) processName = buf; +#elif defined __QNX__ + processName = __progname; #endif return processName; } @@ -437,6 +466,10 @@ static const char* GetProcessExecutablePath() static char buf[1024]; readlink( "/proc/curproc/exe", buf, 1024 ); return buf; +#elif defined __QNX__ + static char buf[_PC_PATH_MAX + 1]; + _cmdname(buf); + return buf; #else return nullptr; #endif @@ -488,7 +521,7 @@ static const char* GetHostInfo() # ifdef __MINGW32__ ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); # else - ptr += sprintf( ptr, "OS: Windows %i.%i.%i\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); # endif } #elif defined __linux__ @@ -515,6 +548,8 @@ static const char* GetHostInfo() ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" ); #elif defined __OpenBSD__ ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" ); +#elif defined __QNX__ + ptr += sprintf( ptr, "OS: QNX\n" ); #else ptr += sprintf( ptr, "OS: unknown\n" ); #endif @@ -687,6 +722,21 @@ static const char* GetHostInfo() size_t sz = sizeof( memSize ); sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 ); ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); +#elif defined __QNX__ + struct asinfo_entry *entries = SYSPAGE_ENTRY(asinfo); + size_t count = SYSPAGE_ENTRY_SIZE(asinfo) / sizeof(struct asinfo_entry); + char *strings = SYSPAGE_ENTRY(strings)->data; + + uint64_t memSize = 0; + size_t i; + for (i = 0; i < count; i++) { + struct asinfo_entry *entry = &entries[i]; + if (strcmp(strings + entry->name, "ram") == 0) { + memSize += entry->end - entry->start + 1; + } + } + memSize = memSize / 1024 / 1024; + ptr += sprintf( ptr, "RAM: %llu MB\n", memSize); #else ptr += sprintf( ptr, "RAM: unknown\n" ); #endif @@ -1142,12 +1192,14 @@ thread_local bool RpThreadShutdown = false; # ifdef TRACY_MANUAL_LIFETIME ProfilerData* s_profilerData = nullptr; static ProfilerThreadData& GetProfilerThreadData(); +static std::atomic s_isProfilerStarted { false }; TRACY_API void StartupProfiler() { s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); new (s_profilerData) ProfilerData(); s_profilerData->profiler.SpawnWorkerThreads(); GetProfilerThreadData().token = ProducerWrapper( *s_profilerData ); + s_isProfilerStarted.store( true, std::memory_order_seq_cst ); } static ProfilerData& GetProfilerData() { @@ -1156,6 +1208,7 @@ static ProfilerData& GetProfilerData() } TRACY_API void ShutdownProfiler() { + s_isProfilerStarted.store( false, std::memory_order_seq_cst ); s_profilerData->~ProfilerData(); tracy_free( s_profilerData ); s_profilerData = nullptr; @@ -1163,6 +1216,10 @@ TRACY_API void ShutdownProfiler() RpThreadInitDone = false; RpInitDone.store( 0, std::memory_order_release ); } +TRACY_API bool IsProfilerStarted() +{ + return s_isProfilerStarted.load( std::memory_order_seq_cst ); +} # else static std::atomic profilerDataLock { 0 }; static std::atomic profilerData { nullptr }; @@ -1375,6 +1432,11 @@ Profiler::Profiler() CalibrateDelay(); ReportTopology(); +#ifdef __linux__ + m_kcore = (KCore*)tracy_malloc( sizeof( KCore ) ); + new(m_kcore) KCore(); +#endif + #ifndef TRACY_NO_EXIT const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" ); if( noExitEnv && noExitEnv[0] == '1' ) @@ -1394,37 +1456,8 @@ Profiler::Profiler() #endif } -void Profiler::SpawnWorkerThreads() +void Profiler::InstallCrashHandler() { -#ifdef TRACY_HAS_SYSTEM_TRACING - if( SysTraceStart( m_samplingPeriod ) ) - { - s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); - std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); - } -#endif - - s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_thread) Thread( LaunchWorker, this ); - -#ifndef TRACY_NO_FRAME_IMAGE - s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_compressThread) Thread( LaunchCompressWorker, this ); -#endif - -#ifdef TRACY_HAS_CALLSTACK - s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_symbolThread) Thread( LaunchSymbolWorker, this ); -#endif - -#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER - s_profilerThreadId = GetThreadId( s_thread->Handle() ); -# ifdef TRACY_HAS_CALLSTACK - s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); -# endif - m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter ); -#endif #if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER struct sigaction threadFreezer = {}; @@ -1442,21 +1475,18 @@ void Profiler::SpawnWorkerThreads() sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); #endif -#ifndef TRACY_NO_CRASH_HANDLER - m_crashHandlerInstalled = true; +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter ); #endif -#ifdef TRACY_HAS_CALLSTACK - InitCallstackCritical(); +#ifndef TRACY_NO_CRASH_HANDLER + m_crashHandlerInstalled = true; #endif - m_timeBegin.store( GetTime(), std::memory_order_relaxed ); } -Profiler::~Profiler() +void Profiler::RemoveCrashHandler() { - m_shutdown.store( true, std::memory_order_relaxed ); - #if defined _WIN32 && !defined TRACY_UWP if( m_crashHandlerInstalled ) RemoveVectoredExceptionHandler( m_exceptionHandler ); #endif @@ -1473,6 +1503,60 @@ Profiler::~Profiler() sigaction( SIGABRT, &m_prevSignal.abrt, nullptr ); } #endif + m_crashHandlerInstalled = false; +} + +void Profiler::SpawnWorkerThreads() +{ +#ifdef TRACY_HAS_SYSTEM_TRACING + // use TRACY_NO_SYS_TRACE=1 to force disabling sys tracing (even if available in the underlying system) + // as it can have significant impact on the size of the traces + const char* noSysTrace = GetEnvVar( "TRACY_NO_SYS_TRACE" ); + const bool disableSystrace = (noSysTrace && noSysTrace[0] == '1'); + if( disableSystrace ) + { + TracyDebug("TRACY: Sys Trace was disabled by 'TRACY_NO_SYS_TRACE=1'\n"); + } + else if( SysTraceStart( m_samplingPeriod ) ) + { + s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); + std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); + } +#endif + + s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_thread) Thread( LaunchWorker, this ); + +#ifndef TRACY_NO_FRAME_IMAGE + s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_compressThread) Thread( LaunchCompressWorker, this ); +#endif + +#ifdef TRACY_HAS_CALLSTACK + s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_symbolThread) Thread( LaunchSymbolWorker, this ); +#endif + +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + s_profilerThreadId = GetThreadId( s_thread->Handle() ); +# ifdef TRACY_HAS_CALLSTACK + s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); +# endif +#endif + +#ifdef TRACY_HAS_CALLSTACK + InitCallstackCritical(); +#endif + + m_timeBegin.store( GetTime(), std::memory_order_relaxed ); +} + +Profiler::~Profiler() +{ + m_shutdown.store( true, std::memory_order_relaxed ); + + RemoveCrashHandler(); #ifdef TRACY_HAS_SYSTEM_TRACING if( s_sysTraceThread ) @@ -1500,6 +1584,11 @@ Profiler::~Profiler() EndCallstack(); #endif +#ifdef __linux__ + m_kcore->~KCore(); + tracy_free( m_kcore ); +#endif + tracy_free( m_lz4Buf ); tracy_free( m_buffer ); LZ4_freeStream( (LZ4_stream_t*)m_stream ); @@ -1677,6 +1766,12 @@ void Profiler::Worker() new(m_broadcast) UdpBroadcast(); # ifdef TRACY_ONLY_LOCALHOST const char* addr = "127.255.255.255"; +# elif defined TRACY_CLIENT_ADDRESS + const char* addr = TRACY_CLIENT_ADDRESS; +# elif defined __QNX__ + // global broadcast address of 255.255.255.255 is not well-supported by QNX, + // use the interface broadcast address instead, e.g. "const char* addr = 192.168.1.255;" +# error Need to specify TRACY_CLIENT_ADDRESS for a QNX target. # else const char* addr = "255.255.255.255"; # endif @@ -1789,6 +1884,7 @@ void Profiler::Worker() m_connectionId.fetch_add( 1, std::memory_order_release ); #endif m_isConnected.store( true, std::memory_order_release ); + InstallCrashHandler(); HandshakeStatus handshake = HandshakeWelcome; m_sock->Send( &handshake, sizeof( handshake ) ); @@ -1891,6 +1987,8 @@ void Profiler::Worker() if( ShouldExit() ) break; m_isConnected.store( false, std::memory_order_release ); + RemoveCrashHandler(); + #ifdef TRACY_ON_DEMAND m_bufferOffset = 0; m_bufferStart = 0; @@ -3280,6 +3378,17 @@ void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) } } } +#elif defined __linux__ + void* data = m_kcore->Retrieve( si.ptr, si.extra ); + if( data ) + { + TracyLfqPrepare( QueueType::SymbolCodeMetadata ); + MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); + MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)data ); + MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); + TracyLfqCommit; + break; + } #endif TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable ); TracyLfqCommit; @@ -3365,7 +3474,22 @@ bool Profiler::HandleServerQuery() } else { - SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName ); + auto t = GetThreadNameData( (uint32_t)ptr ); + if( t ) + { + SendString( ptr, t->name, QueueType::ThreadName ); + if( t->groupHint != 0 ) + { + TracyLfqPrepare( QueueType::ThreadGroupHint ); + MemWrite( &item->threadGroupHint.thread, (uint32_t)ptr ); + MemWrite( &item->threadGroupHint.groupHint, t->groupHint ); + TracyLfqCommit; + } + } + else + { + SendString( ptr, GetThreadName( (uint32_t)ptr ), QueueType::ThreadName ); + } } break; case ServerQuerySourceLocation: @@ -3603,6 +3727,7 @@ void Profiler::ReportTopology() struct CpuData { uint32_t package; + uint32_t die; uint32_t core; uint32_t thread; }; @@ -3632,6 +3757,7 @@ void Profiler::ReportTopology() const uint32_t numcpus = sysinfo.dwNumberOfProcessors; auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); + memset( cpuData, 0, sizeof( CpuData ) * numcpus ); for( uint32_t i=0; icpuTopology.package, data.package ); + MemWrite( &item->cpuTopology.die, data.die ); MemWrite( &item->cpuTopology.core, data.core ); MemWrite( &item->cpuTopology.thread, data.thread ); @@ -3725,6 +3852,7 @@ void Profiler::ReportTopology() TracyLfqPrepare( QueueType::CpuTopology ); MemWrite( &item->cpuTopology.package, data.package ); + MemWrite( &item->cpuTopology.die, data.die ); MemWrite( &item->cpuTopology.core, data.core ); MemWrite( &item->cpuTopology.thread, data.thread ); @@ -3815,15 +3943,12 @@ void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) } else { -#ifdef __ANDROID__ - // On Android it's common for code to be in mappings that are only executable - // but not readable. if( !EnsureReadable( symbol ) ) { AckSymbolCodeNotAvailable(); return; } -#endif + SendLongString( symbol, (const char*)symbol, size, QueueType::SymbolCode ); } } @@ -3831,28 +3956,29 @@ void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) { bool ok = false; - struct stat st; - if( stat( data, &st ) == 0 && (uint64_t)st.st_mtime < m_exectime ) + FILE* f = fopen( data, "rb" ); + if( f ) { - if( st.st_size < ( TargetFrameSize - 16 ) ) + struct stat st; + if( fstat( fileno( f ), &st ) == 0 && (uint64_t)st.st_mtime < m_exectime && st.st_size < ( TargetFrameSize - 16 ) ) { - FILE* f = fopen( data, "rb" ); - if( f ) + auto ptr = (char*)tracy_malloc_fast( st.st_size ); + auto rd = fread( ptr, 1, st.st_size, f ); + if( rd == (size_t)st.st_size ) { - auto ptr = (char*)tracy_malloc_fast( st.st_size ); - auto rd = fread( ptr, 1, st.st_size, f ); - fclose( f ); - if( rd == (size_t)st.st_size ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } + TracyLfqPrepare( QueueType::SourceCodeMetadata ); + MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); + MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); + MemWrite( &item->sourceCodeMetadata.id, id ); + TracyLfqCommit; + ok = true; + } + else + { + tracy_free_fast( ptr ); } } + fclose( f ); } #ifdef TRACY_DEBUGINFOD @@ -3882,6 +4008,10 @@ void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) TracyLfqCommit; ok = true; } + else + { + tracy_free_fast( ptr ); + } } close( d ); } @@ -3908,6 +4038,10 @@ void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) TracyLfqCommit; ok = true; } + else + { + tracy_free_fast( ptr ); + } } } @@ -4175,12 +4309,12 @@ TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t co TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, color ); } -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); } TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) @@ -4258,6 +4392,11 @@ TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data tracy::MemWrite( &item->gpuNewContext.context, data.context ); tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); tracy::MemWrite( &item->gpuNewContext.type, data.type ); + +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + TracyLfqCommitC; } @@ -4270,6 +4409,11 @@ TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); + +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + TracyLfqCommitC; } @@ -4283,6 +4427,15 @@ TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibrat TracyLfqCommitC; } +TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuTimeSync ); + tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTimeSync.context, data.context ); + TracyLfqCommitC; +} + TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) { auto item = tracy::Profiler::QueueSerial(); @@ -4390,13 +4543,192 @@ TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_c tracy::Profiler::QueueSerialFinish(); } +TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTimeSync ); + tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTimeSync.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +struct __tracy_lockable_context_data +{ + uint32_t m_id; +#ifdef TRACY_ON_DEMAND + std::atomic m_lockCount; + std::atomic m_active; +#endif +}; + +TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) +{ + struct __tracy_lockable_context_data *lockdata = (__tracy_lockable_context_data*)tracy::tracy_malloc( sizeof( __tracy_lockable_context_data ) ); + lockdata->m_id =tracy:: GetLockCounter().fetch_add( 1, std::memory_order_relaxed ); +#ifdef TRACY_ON_DEMAND + new(&lockdata->m_lockCount) std::atomic( 0 ); + new(&lockdata->m_active) std::atomic( false ); +#endif + assert( lockdata->m_id != (std::numeric_limits::max)() ); + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockAnnounce ); + tracy::MemWrite( &item->lockAnnounce.id, lockdata->m_id ); + tracy::MemWrite( &item->lockAnnounce.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); + tracy::MemWrite( &item->lockAnnounce.type, tracy::LockType::Lockable ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); + + return lockdata; +} + +TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockTerminate ); + tracy::MemWrite( &item->lockTerminate.id, lockdata->m_id ); + tracy::MemWrite( &item->lockTerminate.time, tracy::Profiler::GetTime() ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); + +#ifdef TRACY_ON_DEMAND + lockdata->m_lockCount.~atomic(); + lockdata->m_active.~atomic(); +#endif + tracy::tracy_free((void*)lockdata); +} + +TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ +#ifdef TRACY_ON_DEMAND + bool queue = false; + const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( locks == 0 || active ) + { + const bool connected = tracy::GetProfiler().IsConnected(); + if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); + if( connected ) queue = true; + } + if( !queue ) return false; +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockWait ); + tracy::MemWrite( &item->lockWait.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockWait.id, lockdata->m_id ); + tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); + return true; +} + +TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); + tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); + tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ +#ifdef TRACY_ON_DEMAND + lockdata->m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); + if( !lockdata->m_active.load( std::memory_order_relaxed ) ) return; + if( !tracy::GetProfiler().IsConnected() ) + { + lockdata->m_active.store( false, std::memory_order_relaxed ); + return; + } +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockRelease ); + tracy::MemWrite( &item->lockRelease.id, lockdata->m_id ); + tracy::MemWrite( &item->lockRelease.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int acquired ) +{ +#ifdef TRACY_ON_DEMAND + if( !acquired ) return; + + bool queue = false; + const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( locks == 0 || active ) + { + const bool connected = tracy::GetProfiler().IsConnected(); + if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); + if( connected ) queue = true; + } + if( !queue ) return; +#endif + + if( acquired ) + { + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); + tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); + tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); + } +} + +TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ) +{ +#ifdef TRACY_ON_DEMAND + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( !active ) return; + const auto connected = tracy::GetProfiler().IsConnected(); + if( !connected ) + { + if( active ) lockdata->m_active.store( false, std::memory_order_relaxed ); + return; + } +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockMark ); + tracy::MemWrite( &item->lockMark.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockMark.id, lockdata->m_id ); + tracy::MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ) +{ + assert( nameSz < (std::numeric_limits::max)() ); + auto ptr = (char*)tracy::tracy_malloc( nameSz ); + memcpy( ptr, name, nameSz ); + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockName ); + tracy::MemWrite( &item->lockNameFat.id, lockdata->m_id ); + tracy::MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); + tracy::MemWrite( &item->lockNameFat.size, (uint16_t)nameSz ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); +} + TRACY_API int ___tracy_connected( void ) { return tracy::GetProfiler().IsConnected(); } #ifdef TRACY_FIBERS -TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber ); } +TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber, 0 ); } TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } #endif @@ -4410,6 +4742,11 @@ TRACY_API void ___tracy_shutdown_profiler( void ) { tracy::ShutdownProfiler(); } + +TRACY_API int ___tracy_profiler_started( void ) +{ + return tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ); +} # endif #ifdef __cplusplus diff --git a/extern/tracy/client/TracyProfiler.hpp b/extern/tracy/client/TracyProfiler.hpp index e3b256df..46f11f3d 100644 --- a/extern/tracy/client/TracyProfiler.hpp +++ b/extern/tracy/client/TracyProfiler.hpp @@ -10,6 +10,7 @@ #include "tracy_concurrentqueue.h" #include "tracy_SPSCQueue.h" #include "TracyCallstack.hpp" +#include "TracyKCore.hpp" #include "TracySysPower.hpp" #include "TracySysTime.hpp" #include "TracyFastVector.hpp" @@ -27,7 +28,7 @@ # include #endif -#if ( defined _WIN32 || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) ) +#if ( (defined _WIN32 && !(defined _M_ARM64 || defined _M_ARM)) || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) ) # define TRACY_HW_TIMER #endif @@ -51,6 +52,10 @@ namespace tracy #if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME) TRACY_API void StartupProfiler(); TRACY_API void ShutdownProfiler(); +TRACY_API bool IsProfilerStarted(); +# define TracyIsStarted tracy::IsProfilerStarted() +#else +# define TracyIsStarted true #endif class GpuCtx; @@ -601,8 +606,7 @@ class Profiler profiler.m_serialLock.unlock(); #else static_cast(depth); // unused - static_cast(name); // unused - MemAlloc( ptr, size, secure ); + MemAllocNamed( ptr, size, secure, name ); #endif } @@ -625,8 +629,7 @@ class Profiler profiler.m_serialLock.unlock(); #else static_cast(depth); // unused - static_cast(name); // unused - MemFree( ptr, secure ); + MemFreeNamed( ptr, secure, name ); #endif } @@ -672,11 +675,12 @@ class Profiler } #ifdef TRACY_FIBERS - static tracy_force_inline void EnterFiber( const char* fiber ) + static tracy_force_inline void EnterFiber( const char* fiber, int32_t groupHint ) { TracyQueuePrepare( QueueType::FiberEnter ); MemWrite( &item->fiberEnter.time, GetTime() ); MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber ); + MemWrite( &item->fiberEnter.groupHint, groupHint ); TracyQueueCommit( fiberEnter ); } @@ -741,29 +745,29 @@ class Profiler // 1b null terminator // nsz zone name (optional) - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, function, nullptr, 0 ); + return AllocSourceLocation( line, source, function, nullptr, 0, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz ); + return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 ); + return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color = 0 ) { const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz ); assert( sz32 <= (std::numeric_limits::max)() ); const auto sz = uint16_t( sz32 ); auto ptr = (char*)tracy_malloc( sz ); memcpy( ptr, &sz, 2 ); - memset( ptr + 2, 0, 4 ); + memcpy( ptr + 2, &color, 4 ); memcpy( ptr + 6, &line, 4 ); memcpy( ptr + 10, function, functionSz ); ptr[10 + functionSz] = '\0'; @@ -794,6 +798,9 @@ class Profiler void HandleSymbolQueueItem( const SymbolQueueItem& si ); #endif + void InstallCrashHandler(); + void RemoveCrashHandler(); + void ClearQueues( tracy::moodycamel::ConsumerToken& token ); void ClearSerial(); DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token ); @@ -990,6 +997,7 @@ class Profiler struct { struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt; } m_prevSignal; + KCore* m_kcore; #endif bool m_crashHandlerInstalled; diff --git a/extern/tracy/client/TracyScoped.hpp b/extern/tracy/client/TracyScoped.hpp index d2274e40..8e81c998 100644 --- a/extern/tracy/client/TracyScoped.hpp +++ b/extern/tracy/client/TracyScoped.hpp @@ -2,6 +2,7 @@ #define __TRACYSCOPED_HPP__ #include +#include #include #include @@ -57,7 +58,7 @@ class ScopedZone TracyQueueCommit( zoneBeginThread ); } - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -69,13 +70,15 @@ class ScopedZone m_connectionId = GetProfiler().ConnectionId(); #endif TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, srcloc ); TracyQueueCommit( zoneBeginThread ); } - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, static_cast(0), is_active ) {} + + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int depth, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -89,12 +92,14 @@ class ScopedZone GetProfiler().SendCallstack( depth ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, srcloc ); TracyQueueCommit( zoneBeginThread ); } + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {} + tracy_force_inline ~ScopedZone() { if( !m_active ) return; @@ -121,6 +126,30 @@ class ScopedZone TracyQueueCommit( zoneTextFatThread ); } + void TextFmt( const char* fmt, ... ) + { + if( !m_active ) return; +#ifdef TRACY_ON_DEMAND + if( GetProfiler().ConnectionId() != m_connectionId ) return; +#endif + va_list args; + va_start( args, fmt ); + auto size = vsnprintf( nullptr, 0, fmt, args ); + va_end( args ); + if( size < 0 ) return; + assert( size < (std::numeric_limits::max)() ); + + char* ptr = (char*)tracy_malloc( size_t( size ) + 1 ); + va_start( args, fmt ); + vsnprintf( ptr, size_t( size ) + 1, fmt, args ); + va_end( args ); + + TracyQueuePrepare( QueueType::ZoneText ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommit( zoneTextFatThread ); + } + tracy_force_inline void Name( const char* txt, size_t size ) { assert( size < (std::numeric_limits::max)() ); @@ -136,6 +165,30 @@ class ScopedZone TracyQueueCommit( zoneTextFatThread ); } + void NameFmt( const char* fmt, ... ) + { + if( !m_active ) return; +#ifdef TRACY_ON_DEMAND + if( GetProfiler().ConnectionId() != m_connectionId ) return; +#endif + va_list args; + va_start( args, fmt ); + auto size = vsnprintf( nullptr, 0, fmt, args ); + va_end( args ); + if( size < 0 ) return; + assert( size < (std::numeric_limits::max)() ); + + char* ptr = (char*)tracy_malloc( size_t( size ) + 1 ); + va_start( args, fmt ); + vsnprintf( ptr, size_t( size ) + 1, fmt, args ); + va_end( args ); + + TracyQueuePrepare( QueueType::ZoneName ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommit( zoneTextFatThread ); + } + tracy_force_inline void Color( uint32_t color ) { if( !m_active ) return; diff --git a/extern/tracy/client/TracySysTrace.cpp b/extern/tracy/client/TracySysTrace.cpp index af0641fe..0fd1d0ac 100644 --- a/extern/tracy/client/TracySysTrace.cpp +++ b/extern/tracy/client/TracySysTrace.cpp @@ -16,16 +16,25 @@ namespace tracy { -static constexpr int GetSamplingFrequency() +static int GetSamplingFrequency() { + int samplingHz = TRACY_SAMPLING_HZ; + + auto env = GetEnvVar( "TRACY_SAMPLING_HZ" ); + if( env ) + { + int val = atoi( env ); + if( val > 0 ) samplingHz = val; + } + #if defined _WIN32 - return TRACY_SAMPLING_HZ > 8000 ? 8000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ ); + return samplingHz > 8000 ? 8000 : ( samplingHz < 1 ? 1 : samplingHz ); #else - return TRACY_SAMPLING_HZ > 1000000 ? 1000000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ ); + return samplingHz > 1000000 ? 1000000 : ( samplingHz < 1 ? 1 : samplingHz ); #endif } -static constexpr int GetSamplingPeriod() +static int GetSamplingPeriod() { return 1000000000 / GetSamplingFrequency(); } @@ -321,7 +330,7 @@ static void SetupVsync() #endif } -static constexpr int GetSamplingInterval() +static int GetSamplingInterval() { return GetSamplingPeriod() / 100; } diff --git a/extern/tracy/client/tracy_rpmalloc.cpp b/extern/tracy/client/tracy_rpmalloc.cpp index 711505d2..4a0d0b40 100644 --- a/extern/tracy/client/tracy_rpmalloc.cpp +++ b/extern/tracy/client/tracy_rpmalloc.cpp @@ -781,7 +781,7 @@ rpmalloc_set_main_thread(void) { static void _rpmalloc_spin(void) { -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !(defined(_M_ARM) || defined(_M_ARM64)) _mm_pause(); #elif defined(__x86_64__) || defined(__i386__) __asm__ volatile("pause" ::: "memory"); @@ -793,8 +793,7 @@ _rpmalloc_spin(void) { #elif defined(__sparc__) __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); #else - struct timespec ts = {0}; - nanosleep(&ts, 0); + std::this_thread::yield(); #endif } diff --git a/extern/tracy/common/TracyProtocol.hpp b/extern/tracy/common/TracyProtocol.hpp index 5eb1639d..54124586 100644 --- a/extern/tracy/common/TracyProtocol.hpp +++ b/extern/tracy/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 64 }; +enum : uint32_t { ProtocolVersion = 69 }; enum : uint16_t { BroadcastVersion = 3 }; using lz4sz_t = uint32_t; @@ -47,10 +47,10 @@ enum ServerQuery : uint8_t ServerQueryFrameName, ServerQueryParameter, ServerQueryFiberName, + ServerQueryExternalName, // Items above are high priority. Split order must be preserved. See IsQueryPrio(). ServerQueryDisconnect, ServerQueryCallstackFrame, - ServerQueryExternalName, ServerQuerySymbol, ServerQuerySymbolCode, ServerQuerySourceCode, diff --git a/extern/tracy/common/TracyQueue.hpp b/extern/tracy/common/TracyQueue.hpp index 051d412a..affbd67a 100644 --- a/extern/tracy/common/TracyQueue.hpp +++ b/extern/tracy/common/TracyQueue.hpp @@ -70,6 +70,7 @@ enum class QueueType : uint8_t KeepAlive, ThreadContext, GpuCalibration, + GpuTimeSync, Crash, CrashReport, ZoneValidation, @@ -107,6 +108,7 @@ enum class QueueType : uint8_t SingleStringData, SecondStringData, MemNamePayload, + ThreadGroupHint, StringData, ThreadName, PlotName, @@ -258,6 +260,7 @@ struct QueueFiberEnter int64_t time; uint64_t fiber; // ptr uint32_t thread; + int32_t groupHint; }; struct QueueFiberLeave @@ -453,6 +456,13 @@ struct QueueGpuCalibration uint8_t context; }; +struct QueueGpuTimeSync +{ + int64_t gpuTime; + int64_t cpuTime; + uint8_t context; +}; + struct QueueGpuContextName { uint8_t context; @@ -469,6 +479,12 @@ struct QueueMemNamePayload uint64_t name; }; +struct QueueThreadGroupHint +{ + uint32_t thread; + int32_t groupHint; +}; + struct QueueMemAlloc { int64_t time; @@ -631,6 +647,7 @@ struct QueueSourceCodeNotAvailable struct QueueCpuTopology { uint32_t package; + uint32_t die; uint32_t core; uint32_t thread; }; @@ -718,11 +735,13 @@ struct QueueItem QueueGpuZoneEnd gpuZoneEnd; QueueGpuTime gpuTime; QueueGpuCalibration gpuCalibration; + QueueGpuTimeSync gpuTimeSync; QueueGpuContextName gpuContextName; QueueGpuContextNameFat gpuContextNameFat; QueueMemAlloc memAlloc; QueueMemFree memFree; QueueMemNamePayload memName; + QueueThreadGroupHint threadGroupHint; QueueCallstackFat callstackFat; QueueCallstackFatThread callstackFatThread; QueueCallstackAllocFat callstackAllocFat; @@ -821,6 +840,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ) + sizeof( QueueThreadContext ), sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), + sizeof( QueueHeader ) + sizeof( QueueGpuTimeSync ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), @@ -858,6 +878,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // single string data sizeof( QueueHeader ), // second string data sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ), + sizeof( QueueHeader ) + sizeof( QueueThreadGroupHint ), // keep all QueueStringTransfer below sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name diff --git a/extern/tracy/common/TracySocket.cpp b/extern/tracy/common/TracySocket.cpp index 25967898..bdba3619 100644 --- a/extern/tracy/common/TracySocket.cpp +++ b/extern/tracy/common/TracySocket.cpp @@ -21,6 +21,9 @@ # pragma warning(disable:4267) # endif # define poll WSAPoll +# ifdef _MSC_VER +# pragma comment(lib, "ws2_32.lib") +# endif #else # include # include diff --git a/extern/tracy/common/TracySystem.cpp b/extern/tracy/common/TracySystem.cpp index 9a477aa3..d51f5d65 100644 --- a/extern/tracy/common/TracySystem.cpp +++ b/extern/tracy/common/TracySystem.cpp @@ -28,6 +28,9 @@ # include #elif defined __NetBSD__ || defined __DragonFly__ # include +#elif defined __QNX__ +# include +# include #endif #ifdef __MINGW32__ @@ -78,6 +81,8 @@ TRACY_API uint32_t GetThreadHandleImpl() return lwp_gettid(); #elif defined __OpenBSD__ return getthrid(); +#elif defined __QNX__ + return (uint32_t) gettid(); #elif defined __EMSCRIPTEN__ // Not supported, but let it compile. return 0; @@ -96,16 +101,10 @@ TRACY_API uint32_t GetThreadHandleImpl() } #ifdef TRACY_ENABLE -struct ThreadNameData -{ - uint32_t id; - const char* name; - ThreadNameData* next; -}; std::atomic& GetThreadNameData(); #endif -#ifdef _MSC_VER +#if defined _MSC_VER && !defined __clang__ # pragma pack( push, 8 ) struct THREADNAME_INFO { @@ -129,6 +128,11 @@ void ThreadNameMsvcMagic( const THREADNAME_INFO& info ) #endif TRACY_API void SetThreadName( const char* name ) +{ + SetThreadNameWithHint( name, 0 ); +} + +TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ) { #if defined _WIN32 # ifdef TRACY_UWP @@ -144,7 +148,7 @@ TRACY_API void SetThreadName( const char* name ) } else { -# if defined _MSC_VER +# if defined _MSC_VER && !defined __clang__ THREADNAME_INFO info; info.dwType = 0x1000; info.szName = name; @@ -176,6 +180,21 @@ TRACY_API void SetThreadName( const char* name ) #endif } } +#elif defined __QNX__ + { + const auto sz = strlen( name ); + if( sz <= _NTO_THREAD_NAME_MAX ) + { + pthread_setname_np( pthread_self(), name ); + } + else + { + char buf[_NTO_THREAD_NAME_MAX + 1]; + memcpy( buf, name, _NTO_THREAD_NAME_MAX ); + buf[_NTO_THREAD_NAME_MAX] = '\0'; + pthread_setname_np( pthread_self(), buf ); + } + }; #endif #ifdef TRACY_ENABLE { @@ -185,6 +204,7 @@ TRACY_API void SetThreadName( const char* name ) buf[sz] = '\0'; auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) ); data->id = detail::GetThreadHandleImpl(); + data->groupHint = groupHint; data->name = buf; data->next = GetThreadNameData().load( std::memory_order_relaxed ); while( !GetThreadNameData().compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {} @@ -192,6 +212,22 @@ TRACY_API void SetThreadName( const char* name ) #endif } +#ifdef TRACY_ENABLE +ThreadNameData* GetThreadNameData( uint32_t id ) +{ + auto ptr = GetThreadNameData().load( std::memory_order_relaxed ); + while( ptr ) + { + if( ptr->id == id ) + { + return ptr; + } + ptr = ptr->next; + } + return nullptr; +} +#endif + TRACY_API const char* GetThreadName( uint32_t id ) { static char buf[256]; @@ -255,6 +291,11 @@ TRACY_API const char* GetThreadName( uint32_t id ) pthread_setcancelstate( cs, 0 ); # endif return buf; +#elif defined __QNX__ + static char qnxNameBuf[_NTO_THREAD_NAME_MAX + 1] = {0}; + if (pthread_getname_np(static_cast(id), qnxNameBuf, _NTO_THREAD_NAME_MAX) == 0) { + return qnxNameBuf; + }; #endif sprintf( buf, "%" PRIu32, id ); diff --git a/extern/tracy/common/TracySystem.hpp b/extern/tracy/common/TracySystem.hpp index e0040e95..2f565e9a 100644 --- a/extern/tracy/common/TracySystem.hpp +++ b/extern/tracy/common/TracySystem.hpp @@ -14,6 +14,16 @@ TRACY_API uint32_t GetThreadHandleImpl(); } #ifdef TRACY_ENABLE +struct ThreadNameData +{ + uint32_t id; + int32_t groupHint; + const char* name; + ThreadNameData* next; +}; + +ThreadNameData* GetThreadNameData( uint32_t id ); + TRACY_API uint32_t GetThreadHandle(); #else static inline uint32_t GetThreadHandle() @@ -23,9 +33,10 @@ static inline uint32_t GetThreadHandle() #endif TRACY_API void SetThreadName( const char* name ); +TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ); TRACY_API const char* GetThreadName( uint32_t id ); -TRACY_API const char* GetEnvVar(const char* name); +TRACY_API const char* GetEnvVar( const char* name ); } diff --git a/extern/tracy/common/TracyVersion.hpp b/extern/tracy/common/TracyVersion.hpp index 2355279f..0905ef94 100644 --- a/extern/tracy/common/TracyVersion.hpp +++ b/extern/tracy/common/TracyVersion.hpp @@ -6,8 +6,8 @@ namespace tracy namespace Version { enum { Major = 0 }; -enum { Minor = 10 }; -enum { Patch = 0 }; +enum { Minor = 11 }; +enum { Patch = 1 }; } } diff --git a/extern/tracy/common/tracy_lz4.cpp b/extern/tracy/common/tracy_lz4.cpp index 6c26639c..15d0990f 100644 --- a/extern/tracy/common/tracy_lz4.cpp +++ b/extern/tracy/common/tracy_lz4.cpp @@ -128,11 +128,11 @@ #endif /* _MSC_VER */ #ifndef LZ4_FORCE_INLINE -# ifdef _MSC_VER /* Visual Studio */ +# if defined (_MSC_VER) && !defined (__clang__) /* MSVC */ # define LZ4_FORCE_INLINE static __forceinline # else # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ +# if defined (__GNUC__) || defined (__clang__) # define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) # else # define LZ4_FORCE_INLINE static inline diff --git a/extern/tracy/libbacktrace/dwarf.cpp b/extern/tracy/libbacktrace/dwarf.cpp index f3899cbc..b6d681aa 100644 --- a/extern/tracy/libbacktrace/dwarf.cpp +++ b/extern/tracy/libbacktrace/dwarf.cpp @@ -4251,6 +4251,19 @@ dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata, } } +bool dwarf_fileline_dwarf_lookup_pc_in_all_entries(struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, backtrace_error_callback error_callback, void *data, + int& found, int ret) +{ + for (struct dwarf_data* ddata = (struct dwarf_data *)state->fileline_data; + ddata != NULL; + ddata = ddata->next) + { + ret = dwarf_lookup_pc(state, ddata, pc, callback, error_callback, data, &found); + if (ret != 0 || found) return true; + } + return false; +} /* Return the file/line information for a PC using the DWARF mapping we built earlier. */ @@ -4262,20 +4275,30 @@ dwarf_fileline (struct backtrace_state *state, uintptr_t pc, { struct dwarf_data *ddata; int found; - int ret; + int ret = 0; if (!state->threaded) + { + if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) { - for (ddata = (struct dwarf_data *) state->fileline_data; - ddata != NULL; - ddata = ddata->next) - { - ret = dwarf_lookup_pc (state, ddata, pc, callback, error_callback, - data, &found); - if (ret != 0 || found) - return ret; - } + return ret; + } + + // if we failed to obtain an entry in range, it can mean that the address map has been changed and new entries + // have been loaded in the meantime. Request a refresh and try again. + if (state->request_known_address_ranges_refresh_fn) + { + int new_range_count = state->request_known_address_ranges_refresh_fn(state, pc); + if (new_range_count > 0) + { + if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) + { + return ret; + } + } } + + } else { struct dwarf_data **pp; diff --git a/extern/tracy/libbacktrace/elf.cpp b/extern/tracy/libbacktrace/elf.cpp index c65bc4e7..e88a33b0 100644 --- a/extern/tracy/libbacktrace/elf.cpp +++ b/extern/tracy/libbacktrace/elf.cpp @@ -38,6 +38,7 @@ POSSIBILITY OF SUCH DAMAGE. */ #include #include #include +#include #ifdef HAVE_DL_ITERATE_PHDR #include @@ -5093,7 +5094,7 @@ elf_uncompress_chdr (struct backtrace_state *state, backtrace_error_callback error_callback, void *data, unsigned char **uncompressed, size_t *uncompressed_size) { - const b_elf_chdr *chdr; + b_elf_chdr chdr; char *alc; size_t alc_len; unsigned char *po; @@ -5105,27 +5106,30 @@ elf_uncompress_chdr (struct backtrace_state *state, if (compressed_size < sizeof (b_elf_chdr)) return 1; - chdr = (const b_elf_chdr *) compressed; + /* The lld linker can misalign a compressed section, so we can't safely read + the fields directly as we can for other ELF sections. See + https://github.com/ianlancetaylor/libbacktrace/pull/120. */ + memcpy (&chdr, compressed, sizeof (b_elf_chdr)); alc = NULL; alc_len = 0; - if (*uncompressed != NULL && *uncompressed_size >= chdr->ch_size) + if (*uncompressed != NULL && *uncompressed_size >= chdr.ch_size) po = *uncompressed; else { - alc_len = chdr->ch_size; + alc_len = chdr.ch_size; alc = (char*)backtrace_alloc (state, alc_len, error_callback, data); if (alc == NULL) return 0; po = (unsigned char *) alc; } - switch (chdr->ch_type) + switch (chdr.ch_type) { case ELFCOMPRESS_ZLIB: if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), compressed_size - sizeof (b_elf_chdr), - zdebug_table, po, chdr->ch_size)) + zdebug_table, po, chdr.ch_size)) goto skip; break; @@ -5133,7 +5137,7 @@ elf_uncompress_chdr (struct backtrace_state *state, if (!elf_zstd_decompress (compressed + sizeof (b_elf_chdr), compressed_size - sizeof (b_elf_chdr), (unsigned char *)zdebug_table, po, - chdr->ch_size)) + chdr.ch_size)) goto skip; break; @@ -5143,7 +5147,7 @@ elf_uncompress_chdr (struct backtrace_state *state, } *uncompressed = po; - *uncompressed_size = chdr->ch_size; + *uncompressed_size = chdr.ch_size; return 1; @@ -5585,6 +5589,7 @@ elf_uncompress_lzma_block (const unsigned char *compressed, uint64_t header_compressed_size; uint64_t header_uncompressed_size; unsigned char lzma2_properties; + size_t crc_offset; uint32_t computed_crc; uint32_t stream_crc; size_t uncompressed_offset; @@ -5688,19 +5693,20 @@ elf_uncompress_lzma_block (const unsigned char *compressed, /* The properties describe the dictionary size, but we don't care what that is. */ - /* Block header padding. */ - if (unlikely (off + 4 > compressed_size)) + /* Skip to just before CRC, verifying zero bytes in between. */ + crc_offset = block_header_offset + block_header_size - 4; + if (unlikely (crc_offset + 4 > compressed_size)) { elf_uncompress_failed (); return 0; } - - off = (off + 3) &~ (size_t) 3; - - if (unlikely (off + 4 > compressed_size)) + for (; off < crc_offset; off++) { - elf_uncompress_failed (); - return 0; + if (compressed[off] != 0) + { + elf_uncompress_failed (); + return 0; + } } /* Block header CRC. */ @@ -6518,8 +6524,9 @@ backtrace_uncompress_lzma (struct backtrace_state *state, static int elf_add (struct backtrace_state *state, const char *filename, int descriptor, const unsigned char *memory, size_t memory_size, - uintptr_t base_address, backtrace_error_callback error_callback, - void *data, fileline *fileline_fn, int *found_sym, int *found_dwarf, + uintptr_t base_address, struct elf_ppc64_opd_data *caller_opd, + backtrace_error_callback error_callback, void *data, + fileline *fileline_fn, int *found_sym, int *found_dwarf, struct dwarf_data **fileline_entry, int exe, int debuginfo, const char *with_buildid_data, uint32_t with_buildid_size) { @@ -6574,6 +6581,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, struct elf_view split_debug_view[DEBUG_MAX]; unsigned char split_debug_view_valid[DEBUG_MAX]; struct elf_ppc64_opd_data opd_data, *opd; + int opd_view_valid; struct dwarf_sections dwarf_sections; struct dwarf_data *fileline_altlink = NULL; @@ -6602,6 +6610,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, debug_view_valid = 0; memset (&split_debug_view_valid[0], 0, sizeof split_debug_view_valid); opd = NULL; + opd_view_valid = 0; if (!elf_get_view (state, descriptor, memory, memory_size, 0, sizeof ehdr, error_callback, data, &ehdr_view)) @@ -6885,12 +6894,18 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, opd->addr = shdr->sh_addr; opd->data = (const char *) opd_data.view.view.data; opd->size = shdr->sh_size; + opd_view_valid = 1; } } + /* A debuginfo file may not have a useful .opd section, but we can use the + one from the original executable. */ + if (opd == NULL) + opd = caller_opd; + if (symtab_shndx == 0) symtab_shndx = dynsym_shndx; - if (symtab_shndx != 0 && !debuginfo) + if (symtab_shndx != 0) { const b_elf_shdr *symtab_shdr; unsigned int strtab_shndx; @@ -6966,9 +6981,9 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, elf_release_view (state, &debuglink_view, error_callback, data); if (debugaltlink_view_valid) elf_release_view (state, &debugaltlink_view, error_callback, data); - ret = elf_add (state, "", d, NULL, 0, base_address, error_callback, - data, fileline_fn, found_sym, found_dwarf, NULL, 0, - 1, NULL, 0); + ret = elf_add (state, "", d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 1, NULL, 0); if (ret < 0) backtrace_close (d, error_callback, data); else if (descriptor >= 0) @@ -6983,12 +6998,6 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, buildid_view_valid = 0; } - if (opd) - { - elf_release_view (state, &opd->view, error_callback, data); - opd = NULL; - } - if (debuglink_name != NULL) { int d; @@ -7003,9 +7012,9 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, elf_release_view (state, &debuglink_view, error_callback, data); if (debugaltlink_view_valid) elf_release_view (state, &debugaltlink_view, error_callback, data); - ret = elf_add (state, "", d, NULL, 0, base_address, error_callback, - data, fileline_fn, found_sym, found_dwarf, NULL, 0, - 1, NULL, 0); + ret = elf_add (state, "", d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 1, NULL, 0); if (ret < 0) backtrace_close (d, error_callback, data); else if (descriptor >= 0) @@ -7030,7 +7039,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, { int ret; - ret = elf_add (state, filename, d, NULL, 0, base_address, + ret = elf_add (state, filename, d, NULL, 0, base_address, opd, error_callback, data, fileline_fn, found_sym, found_dwarf, &fileline_altlink, 0, 1, debugaltlink_buildid_data, debugaltlink_buildid_size); @@ -7067,7 +7076,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, if (ret) { ret = elf_add (state, filename, -1, gnu_debugdata_uncompressed, - gnu_debugdata_uncompressed_size, base_address, + gnu_debugdata_uncompressed_size, base_address, opd, error_callback, data, fileline_fn, found_sym, found_dwarf, NULL, 0, 0, NULL, 0); if (ret >= 0 && descriptor >= 0) @@ -7076,6 +7085,13 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, } } + if (opd_view_valid) + { + elf_release_view (state, &opd->view, error_callback, data); + opd_view_valid = 0; + opd = NULL; + } + /* Read all the debug sections in a single view, since they are probably adjacent in the file. If any of sections are uncompressed, we never release this view. */ @@ -7322,7 +7338,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, if (split_debug_view_valid[i]) elf_release_view (state, &split_debug_view[i], error_callback, data); } - if (opd) + if (opd_view_valid) elf_release_view (state, &opd->view, error_callback, data); if (descriptor >= 0) backtrace_close (descriptor, error_callback, data); @@ -7350,13 +7366,37 @@ struct PhdrIterate { char* dlpi_name; ElfW(Addr) dlpi_addr; + ElfW(Addr) dlpi_end_addr; }; FastVector s_phdrData(16); +struct ElfAddrRange +{ + ElfW(Addr) dlpi_addr; + ElfW(Addr) dlpi_end_addr; +}; +FastVector s_sortedKnownElfRanges(16); + +static int address_in_known_elf_ranges(uintptr_t pc) +{ + auto it = std::lower_bound( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), pc, + []( const ElfAddrRange& lhs, const uintptr_t rhs ) { return uintptr_t(lhs.dlpi_addr) > rhs; } ); + if( it != s_sortedKnownElfRanges.end() && pc <= it->dlpi_end_addr ) + { + return true; + } + return false; +} + static int phdr_callback_mock (struct dl_phdr_info *info, size_t size ATTRIBUTE_UNUSED, void *pdata) { + if( address_in_known_elf_ranges(info->dlpi_addr) ) + { + return 0; + } + auto ptr = s_phdrData.push_next(); if (info->dlpi_name) { @@ -7366,6 +7406,12 @@ phdr_callback_mock (struct dl_phdr_info *info, size_t size ATTRIBUTE_UNUSED, } else ptr->dlpi_name = nullptr; ptr->dlpi_addr = info->dlpi_addr; + + // calculate the end address as well, so we can quickly determine if a PC is within the range of this image + ptr->dlpi_end_addr = uintptr_t(info->dlpi_addr) + (info->dlpi_phnum ? uintptr_t( + info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz) : 0); + return 0; } @@ -7408,7 +7454,7 @@ phdr_callback (struct PhdrIterate *info, void *pdata) return 0; } - if (elf_add (pd->state, filename, descriptor, NULL, 0, info->dlpi_addr, + if (elf_add (pd->state, filename, descriptor, NULL, 0, info->dlpi_addr, NULL, pd->error_callback, pd->data, &elf_fileline_fn, pd->found_sym, &found_dwarf, NULL, 0, 0, NULL, 0)) { @@ -7422,6 +7468,66 @@ phdr_callback (struct PhdrIterate *info, void *pdata) return 0; } +static int elf_iterate_phdr_and_add_new_files(phdr_data *pd) +{ + assert(s_phdrData.empty()); + // dl_iterate_phdr, will only add entries for elf files loaded in a previously unseen range + dl_iterate_phdr(phdr_callback_mock, nullptr); + + if(s_phdrData.size() == 0) + { + return 0; + } + + uint32_t headersAdded = 0; + for (auto &v : s_phdrData) + { + phdr_callback(&v, (void *)pd); + + auto newEntry = s_sortedKnownElfRanges.push_next(); + newEntry->dlpi_addr = v.dlpi_addr; + newEntry->dlpi_end_addr = v.dlpi_end_addr; + + tracy_free(v.dlpi_name); + + headersAdded++; + } + + s_phdrData.clear(); + + std::sort( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), + []( const ElfAddrRange& lhs, const ElfAddrRange& rhs ) { return lhs.dlpi_addr > rhs.dlpi_addr; } ); + + return headersAdded; +} + +#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT +/* Request an elf entry update if the pc passed in is not in any of the known elf ranges. +This could mean that new images were dlopened and we need to add those new elf entries */ +static int elf_refresh_address_ranges_if_needed(struct backtrace_state *state, uintptr_t pc) +{ + if ( address_in_known_elf_ranges(pc) ) + { + return 0; + } + + struct phdr_data pd; + int found_sym = 0; + int found_dwarf = 0; + fileline fileline_fn = nullptr; + pd.state = state; + pd.error_callback = nullptr; + pd.data = nullptr; + pd.fileline_fn = &fileline_fn; + pd.found_sym = &found_sym; + pd.found_dwarf = &found_dwarf; + pd.exe_filename = nullptr; + pd.exe_descriptor = -1; + + return elf_iterate_phdr_and_add_new_files(&pd); +} +#endif //#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT + /* Initialize the backtrace data we need from an ELF executable. At the ELF level, all we need to do is find the debug info sections. */ @@ -7437,9 +7543,9 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, fileline elf_fileline_fn = elf_nodebug; struct phdr_data pd; - ret = elf_add (state, filename, descriptor, NULL, 0, 0, error_callback, data, - &elf_fileline_fn, &found_sym, &found_dwarf, NULL, 1, 0, NULL, - 0); + ret = elf_add (state, filename, descriptor, NULL, 0, 0, NULL, error_callback, + data, &elf_fileline_fn, &found_sym, &found_dwarf, NULL, 1, 0, + NULL, 0); if (!ret) return 0; @@ -7452,14 +7558,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, pd.exe_filename = filename; pd.exe_descriptor = ret < 0 ? descriptor : -1; - assert (s_phdrData.empty()); - dl_iterate_phdr (phdr_callback_mock, nullptr); - for (auto& v : s_phdrData) - { - phdr_callback (&v, (void *) &pd); - tracy_free (v.dlpi_name); - } - s_phdrData.clear(); + elf_iterate_phdr_and_add_new_files(&pd); if (!state->threaded) { @@ -7485,6 +7584,13 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, if (*fileline_fn == NULL || *fileline_fn == elf_nodebug) *fileline_fn = elf_fileline_fn; + // install an address range refresh callback so we can cope with dynamically loaded elf files +#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT + state->request_known_address_ranges_refresh_fn = elf_refresh_address_ranges_if_needed; +#else + state->request_known_address_ranges_refresh_fn = NULL; +#endif + return 1; } diff --git a/extern/tracy/libbacktrace/fileline.cpp b/extern/tracy/libbacktrace/fileline.cpp index 8645d754..5a37ff0c 100644 --- a/extern/tracy/libbacktrace/fileline.cpp +++ b/extern/tracy/libbacktrace/fileline.cpp @@ -47,6 +47,18 @@ POSSIBILITY OF SUCH DAMAGE. */ #include #endif +#ifdef HAVE_WINDOWS_H +#ifndef WIN32_MEAN_AND_LEAN +#define WIN32_MEAN_AND_LEAN +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include +#endif + #include "backtrace.hpp" #include "internal.hpp" @@ -158,6 +170,47 @@ macho_get_executable_path (struct backtrace_state *state, #endif /* !defined (HAVE_MACH_O_DYLD_H) */ +#if HAVE_DECL__PGMPTR + +#define windows_executable_filename() _pgmptr + +#else /* !HAVE_DECL__PGMPTR */ + +#define windows_executable_filename() NULL + +#endif /* !HAVE_DECL__PGMPTR */ + +#ifdef HAVE_WINDOWS_H + +#define FILENAME_BUF_SIZE (MAX_PATH) + +static char * +windows_get_executable_path (char *buf, backtrace_error_callback error_callback, + void *data) +{ + size_t got; + int error; + + got = GetModuleFileNameA (NULL, buf, FILENAME_BUF_SIZE - 1); + error = GetLastError (); + if (got == 0 + || (got == FILENAME_BUF_SIZE - 1 && error == ERROR_INSUFFICIENT_BUFFER)) + { + error_callback (data, + "could not get the filename of the current executable", + error); + return NULL; + } + return buf; +} + +#else /* !defined (HAVE_WINDOWS_H) */ + +#define windows_get_executable_path(buf, error_callback, data) NULL +#define FILENAME_BUF_SIZE 64 + +#endif /* !defined (HAVE_WINDOWS_H) */ + /* Initialize the fileline information from the executable. Returns 1 on success, 0 on failure. */ @@ -171,7 +224,7 @@ fileline_initialize (struct backtrace_state *state, int called_error_callback; int descriptor; const char *filename; - char buf[64]; + char buf[FILENAME_BUF_SIZE]; if (!state->threaded) failed = state->fileline_initialization_failed; @@ -195,7 +248,7 @@ fileline_initialize (struct backtrace_state *state, descriptor = -1; called_error_callback = 0; - for (pass = 0; pass < 8; ++pass) + for (pass = 0; pass < 10; ++pass) { int does_not_exist; @@ -208,25 +261,33 @@ fileline_initialize (struct backtrace_state *state, filename = getexecname (); break; case 2: - filename = "/proc/self/exe"; + /* Test this before /proc/self/exe, as the latter exists but points + to the wine binary (and thus doesn't work). */ + filename = windows_executable_filename (); break; case 3: - filename = "/proc/curproc/file"; + filename = "/proc/self/exe"; break; case 4: + filename = "/proc/curproc/file"; + break; + case 5: snprintf (buf, sizeof (buf), "/proc/%ld/object/a.out", (long) getpid ()); filename = buf; break; - case 5: + case 6: filename = sysctl_exec_name1 (state, error_callback, data); break; - case 6: + case 7: filename = sysctl_exec_name2 (state, error_callback, data); break; - case 7: + case 8: filename = macho_get_executable_path (state, error_callback, data); break; + case 9: + filename = windows_get_executable_path (buf, error_callback, data); + break; default: abort (); } diff --git a/extern/tracy/libbacktrace/internal.hpp b/extern/tracy/libbacktrace/internal.hpp index f871844b..fea298fa 100644 --- a/extern/tracy/libbacktrace/internal.hpp +++ b/extern/tracy/libbacktrace/internal.hpp @@ -133,6 +133,11 @@ typedef void (*syminfo) (struct backtrace_state *state, uintptr_t pc, backtrace_syminfo_callback callback, backtrace_error_callback error_callback, void *data); +/* The type of the function that will trigger an known address range refresh + (if pc passed in is for an address whichs lies ourtisde of known ranges) */ +typedef int (*request_known_address_ranges_refresh)(struct backtrace_state *state, + uintptr_t pc); + /* What the backtrace state pointer points to. */ struct backtrace_state @@ -159,6 +164,8 @@ struct backtrace_state int lock_alloc; /* The freelist when using mmap. */ struct backtrace_freelist_struct *freelist; + /* Trigger an known address range refresh */ + request_known_address_ranges_refresh request_known_address_ranges_refresh_fn; }; /* Open a file for reading. Returns -1 on error. If DOES_NOT_EXIST diff --git a/extern/tracy/tracy/Tracy.hpp b/extern/tracy/tracy/Tracy.hpp index 978eb5ef..e75d02ce 100644 --- a/extern/tracy/tracy/Tracy.hpp +++ b/extern/tracy/tracy/Tracy.hpp @@ -18,6 +18,8 @@ #ifndef TRACY_ENABLE +#define TracyNoop + #define ZoneNamed(x,y) #define ZoneNamedN(x,y,z) #define ZoneNamedC(x,y,z) @@ -33,8 +35,12 @@ #define ZoneText(x,y) #define ZoneTextV(x,y,z) +#define ZoneTextF(x,...) +#define ZoneTextVF(x,y,...) #define ZoneName(x,y) #define ZoneNameV(x,y,z) +#define ZoneNameF(x,...) +#define ZoneNameVF(x,y,...) #define ZoneColor(x) #define ZoneColorV(x,y) #define ZoneValue(x) @@ -109,9 +115,11 @@ #define TracyParameterRegister(x,y) #define TracyParameterSetup(x,y,z,w) #define TracyIsConnected false +#define TracyIsStarted false #define TracySetProgramName(x) #define TracyFiberEnter(x) +#define TracyFiberEnterHint(x,y) #define TracyFiberLeave #else @@ -122,6 +130,8 @@ #include "../client/TracyProfiler.hpp" #include "../client/TracyScoped.hpp" +#define TracyNoop tracy::ProfilerAvailable() + #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK # define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) # define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) @@ -130,6 +140,7 @@ # define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) # define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) +# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active ) #else # define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) # define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) @@ -138,6 +149,7 @@ # define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, active ) # define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ) +# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, active ) #endif #define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) @@ -147,8 +159,12 @@ #define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size ) #define ZoneTextV( varname, txt, size ) varname.Text( txt, size ) +#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ ) +#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ ) #define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size ) #define ZoneNameV( varname, txt, size ) varname.Name( txt, size ) +#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ ) +#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ ) #define ZoneColor( color ) ___tracy_scoped_zone.Color( color ) #define ZoneColorV( varname, color ) varname.Color( color ) #define ZoneValue( value ) ___tracy_scoped_zone.Value( value ) @@ -274,7 +290,8 @@ #define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); #ifdef TRACY_FIBERS -# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber ) +# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 ) +# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint ) # define TracyFiberLeave tracy::Profiler::LeaveFiber() #endif diff --git a/extern/tracy/tracy/TracyC.h b/extern/tracy/tracy/TracyC.h index 996889c4..8b447beb 100644 --- a/extern/tracy/tracy/TracyC.h +++ b/extern/tracy/tracy/TracyC.h @@ -39,6 +39,8 @@ TRACY_API void ___tracy_set_thread_name( const char* name ); typedef const void* TracyCZoneCtx; +typedef const void* TracyCLockCtx; + #define TracyCZone(c,x) #define TracyCZoneN(c,x,y) #define TracyCZoneC(c,x,y) @@ -96,7 +98,18 @@ typedef const void* TracyCZoneCtx; #define TracyCMessageCS(x,y,z,w) #define TracyCMessageLCS(x,y,z) +#define TracyCLockCtx(l) +#define TracyCLockAnnounce(l) +#define TracyCLockTerminate(l) +#define TracyCLockBeforeLock(l) +#define TracyCLockAfterLock(l) +#define TracyCLockAfterUnlock(l) +#define TracyCLockAfterTryLock(l,x) +#define TracyCLockMark(l) +#define TracyCLockCustomName(l,x,y) + #define TracyCIsConnected 0 +#define TracyCIsStarted 0 #ifdef TRACY_FIBERS # define TracyCFiberEnter(fiber) @@ -172,18 +185,31 @@ struct ___tracy_gpu_calibration_data { uint8_t context; }; +struct ___tracy_gpu_time_sync_data { + int64_t gpuTime; + uint8_t context; +}; + +struct __tracy_lockable_context_data; + // Some containers don't support storing const types. // This struct, as visible to user, is immutable, so treat it as if const was declared here. typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; +typedef struct __tracy_lockable_context_data* TracyCLockCtx; #ifdef TRACY_MANUAL_LIFETIME TRACY_API void ___tracy_startup_profiler(void); TRACY_API void ___tracy_shutdown_profiler(void); +TRACY_API int ___tracy_profiler_started(void); + +# define TracyCIsStarted ___tracy_profiler_started() +#else +# define TracyCIsStarted 1 #endif -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ); -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ); +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ); TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ); TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ); @@ -204,6 +230,7 @@ TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data ); TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data ); TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data ); TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data ); +TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data ); TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data ); TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data ); @@ -214,6 +241,7 @@ TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_dat TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data ); TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data ); TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data ); +TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data ); TRACY_API int ___tracy_connected(void); @@ -351,6 +379,25 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); # define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color ) #endif + +TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ); +TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int acquired ); +TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ); +TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ); + +#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock ); +#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock ); +#define TracyCLockAfterLock( lock ) ___tracy_after_lock_lockable_ctx( lock ); +#define TracyCLockAfterUnlock( lock ) ___tracy_after_unlock_lockable_ctx( lock ); +#define TracyCLockAfterTryLock( lock, acquired ) ___tracy_after_try_lock_lockable_ctx( lock, acquired ); +#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz ); + #define TracyCIsConnected ___tracy_connected() #ifdef TRACY_FIBERS diff --git a/extern/tracy/tracy/TracyD3D11.hpp b/extern/tracy/tracy/TracyD3D11.hpp index 8aebdb26..3ed151bf 100644 --- a/extern/tracy/tracy/TracyD3D11.hpp +++ b/extern/tracy/tracy/TracyD3D11.hpp @@ -357,7 +357,7 @@ class D3D11ZoneScope private: tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active ) #ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) + : m_active( active && GetProfiler().IsConnected() ) #else : m_active( active ) #endif diff --git a/extern/tracy/tracy/TracyLua.hpp b/extern/tracy/tracy/TracyLua.hpp index c972ffb2..51dead51 100644 --- a/extern/tracy/tracy/TracyLua.hpp +++ b/extern/tracy/tracy/TracyLua.hpp @@ -188,6 +188,13 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth ) TracyQueueCommit( callstackAllocFatThread ); } +static inline void LuaShortenSrc( char* dst, const char* src ) +{ + size_t l = std::min( (size_t)255, strlen( src ) ); + memcpy( dst, src, l ); + dst[l] = 0; +} + static inline int LuaZoneBeginS( lua_State* L ) { #ifdef TRACY_ON_DEMAND @@ -207,7 +214,9 @@ static inline int LuaZoneBeginS( lua_State* L ) lua_Debug dbg; lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src ); + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -237,8 +246,10 @@ static inline int LuaZoneBeginNS( lua_State* L ) lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); size_t nsz; + char src[256]; + LuaShortenSrc( src, dbg.source ); const auto name = lua_tolstring( L, 1, &nsz ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -264,7 +275,9 @@ static inline int LuaZoneBegin( lua_State* L ) lua_Debug dbg; lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src ); + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -290,8 +303,10 @@ static inline int LuaZoneBeginN( lua_State* L ) lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); size_t nsz; + char src[256]; + LuaShortenSrc( src, dbg.source ); const auto name = lua_tolstring( L, 1, &nsz ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); diff --git a/extern/tracy/tracy/TracyOpenCL.hpp b/extern/tracy/tracy/TracyOpenCL.hpp index 34466ccc..20d0a7ca 100644 --- a/extern/tracy/tracy/TracyOpenCL.hpp +++ b/extern/tracy/tracy/TracyOpenCL.hpp @@ -373,9 +373,9 @@ namespace tracy { using TracyCLCtx = tracy::OpenCLCtx*; -#define TracyCLContext(context, device) tracy::CreateCLContext(context, device); +#define TracyCLContext(ctx, device) tracy::CreateCLContext(ctx, device); #define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx); -#define TracyCLContextName(context, name, size) ctx->Name(name, size); +#define TracyCLContextName(ctx, name, size) ctx->Name(name, size); #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK # define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); # define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); diff --git a/extern/tracy/tracy/TracyVulkan.hpp b/extern/tracy/tracy/TracyVulkan.hpp index 2d079f7b..c34b7185 100644 --- a/extern/tracy/tracy/TracyVulkan.hpp +++ b/extern/tracy/tracy/TracyVulkan.hpp @@ -61,7 +61,9 @@ namespace tracy Operation(vkResetQueryPool) #define LoadVkDeviceExtensionSymbols(Operation) \ - Operation(vkGetCalibratedTimestampsEXT) \ + Operation(vkGetCalibratedTimestampsEXT) + +#define LoadVkInstanceExtensionSymbols(Operation) \ Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) #define LoadVkInstanceCoreSymbols(Operation) \ @@ -72,6 +74,7 @@ struct VkSymbolTable #define MAKE_PFN(name) PFN_##name name; LoadVkDeviceCoreSymbols(MAKE_PFN) LoadVkDeviceExtensionSymbols(MAKE_PFN) + LoadVkInstanceExtensionSymbols(MAKE_PFN) LoadVkInstanceCoreSymbols(MAKE_PFN) #undef MAKE_PFN }; @@ -215,7 +218,9 @@ class VkCtx WriteInitialItem( physdev, tcpu, tgpu ); - m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); + // We need the buffer to be twice as large for availability values + size_t resSize = sizeof( int64_t ) * m_queryCount * 2; + m_res = (int64_t*)tracy_malloc( resSize ); } #endif @@ -280,17 +285,22 @@ class VkCtx } - if( VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY ) ) - { - m_oldCnt = cnt; - return; - } + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) ); for( unsigned int idx=0; idxhdr.type, QueueType::GpuTime ); - MemWrite( &item->gpuTime.gpuTime, m_res[idx] ); + MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] ); MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) ); MemWrite( &item->gpuTime.context, m_context ); Profiler::QueueSerialFinish(); @@ -320,7 +330,6 @@ class VkCtx m_tail += cnt; } -private: tracy_force_inline unsigned int NextQueryId() { const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed); @@ -332,6 +341,12 @@ class VkCtx return m_context; } + tracy_force_inline VkQueryPool GetQueryPool() const + { + return m_query; + } + +private: tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) { assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); @@ -402,11 +417,11 @@ class VkCtx }; uint64_t ts[2]; uint64_t deviation[NumProbes]; - for( int i=0; i deviation[i] ) { minDeviation = deviation[i]; } @@ -457,6 +472,7 @@ class VkCtx LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL ) LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL ) LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL ) #undef VK_GET_DEVICE_SYMBOL #undef VK_LOAD_DEVICE_SYMBOL @@ -472,7 +488,9 @@ class VkCtx VkSymbolTable m_symbols; #endif uint64_t m_deviation; +#ifdef _WIN32 int64_t m_qpcToNs; +#endif int64_t m_prevCalibration; uint8_t m_context;