diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 6189caaa..921791e4 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -174,6 +174,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::QueryInterface(REFIID riid, void** ppvObject) { + ScopedCpuProfileZone(); if (ppvObject == nullptr) return E_POINTER; @@ -212,6 +213,7 @@ namespace dxvk { UINT STDMETHODCALLTYPE D3D9DeviceEx::GetAvailableTextureMem() { + ScopedCpuProfileZone(); // This is not meant to be accurate. // The values are also wildly incorrect in d3d9... But some games rely // on this inaccurate value... @@ -232,6 +234,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDirect3D(IDirect3D9** ppD3D9) { + ScopedCpuProfileZone(); if (ppD3D9 == nullptr) return D3DERR_INVALIDCALL; @@ -241,11 +244,13 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDeviceCaps(D3DCAPS9* pCaps) { + ScopedCpuProfileZone(); return m_adapter->GetDeviceCaps(m_deviceType, pCaps); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE* pMode) { + ScopedCpuProfileZone(); if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; @@ -254,6 +259,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters) { + ScopedCpuProfileZone(); if (pParameters == nullptr) return D3DERR_INVALIDCALL; @@ -270,6 +276,7 @@ namespace dxvk { UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9* pCursorBitmap) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pCursorBitmap == nullptr)) @@ -320,6 +327,7 @@ namespace dxvk { void STDMETHODCALLTYPE D3D9DeviceEx::SetCursorPosition(int X, int Y, DWORD Flags) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); // I was not able to find an instance @@ -335,6 +343,7 @@ namespace dxvk { BOOL STDMETHODCALLTYPE D3D9DeviceEx::ShowCursor(BOOL bShow) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return m_cursor.ShowCursor(bShow); @@ -344,11 +353,13 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChain( D3DPRESENT_PARAMETERS* pPresentationParameters, IDirect3DSwapChain9** ppSwapChain) { + ScopedCpuProfileZone(); return CreateAdditionalSwapChainEx(pPresentationParameters, nullptr, ppSwapChain); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9** pSwapChain) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(pSwapChain); @@ -375,6 +386,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Reset(D3DPRESENT_PARAMETERS* pPresentationParameters) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); HRESULT hr = ResetSwapChain(pPresentationParameters, nullptr); @@ -397,6 +409,7 @@ namespace dxvk { const RECT* pDestRect, HWND hDestWindowOverride, const RGNDATA* pDirtyRegion) { + ScopedCpuProfileZone(); return PresentEx( pSourceRect, pDestRect, @@ -411,6 +424,7 @@ namespace dxvk { UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9** ppBackBuffer) { + ScopedCpuProfileZone(); InitReturnPtr(ppBackBuffer); if (unlikely(iSwapChain != 0)) @@ -421,6 +435,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS* pRasterStatus) { + ScopedCpuProfileZone(); if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; @@ -429,6 +444,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDialogBoxMode(BOOL bEnableDialogs) { + ScopedCpuProfileZone(); return m_implicitSwapchain->SetDialogBoxMode(bEnableDialogs); } @@ -437,6 +453,7 @@ namespace dxvk { UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP* pRamp) { + ScopedCpuProfileZone(); if (unlikely(iSwapChain != 0)) return; @@ -445,6 +462,7 @@ namespace dxvk { void STDMETHODCALLTYPE D3D9DeviceEx::GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP* pRamp) { + ScopedCpuProfileZone(); if (unlikely(iSwapChain != 0)) return; @@ -461,6 +479,7 @@ namespace dxvk { D3DPOOL Pool, IDirect3DTexture9** ppTexture, HANDLE* pSharedHandle) { + ScopedCpuProfileZone(); InitReturnPtr(ppTexture); if (unlikely(ppTexture == nullptr)) @@ -519,6 +538,7 @@ namespace dxvk { D3DPOOL Pool, IDirect3DVolumeTexture9** ppVolumeTexture, HANDLE* pSharedHandle) { + ScopedCpuProfileZone(); InitReturnPtr(ppVolumeTexture); if (unlikely(ppVolumeTexture == nullptr)) @@ -567,6 +587,7 @@ namespace dxvk { D3DPOOL Pool, IDirect3DCubeTexture9** ppCubeTexture, HANDLE* pSharedHandle) { + ScopedCpuProfileZone(); InitReturnPtr(ppCubeTexture); if (unlikely(ppCubeTexture == nullptr)) @@ -614,6 +635,7 @@ namespace dxvk { D3DPOOL Pool, IDirect3DVertexBuffer9** ppVertexBuffer, HANDLE* pSharedHandle) { + ScopedCpuProfileZone(); InitReturnPtr(ppVertexBuffer); if (unlikely(ppVertexBuffer == nullptr)) @@ -653,6 +675,7 @@ namespace dxvk { D3DPOOL Pool, IDirect3DIndexBuffer9** ppIndexBuffer, HANDLE* pSharedHandle) { + ScopedCpuProfileZone(); InitReturnPtr(ppIndexBuffer); if (unlikely(ppIndexBuffer == nullptr)) @@ -693,6 +716,7 @@ namespace dxvk { BOOL Lockable, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle) { + ScopedCpuProfileZone(); return CreateRenderTargetEx( Width, Height, @@ -715,6 +739,7 @@ namespace dxvk { BOOL Discard, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle) { + ScopedCpuProfileZone(); return CreateDepthStencilSurfaceEx( Width, Height, @@ -733,6 +758,7 @@ namespace dxvk { const RECT* pSourceRect, IDirect3DSurface9* pDestinationSurface, const POINT* pDestPoint) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9Surface* src = static_cast(pSourceSurface); @@ -845,6 +871,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateTexture( IDirect3DBaseTexture9* pSourceTexture, IDirect3DBaseTexture9* pDestinationTexture) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (!pDestinationTexture || !pSourceTexture) @@ -945,6 +972,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTargetData( IDirect3DSurface9* pRenderTarget, IDirect3DSurface9* pDestSurface) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9Surface* src = static_cast(pRenderTarget); @@ -1004,6 +1032,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9* pDestSurface) { + ScopedCpuProfileZone(); if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; @@ -1017,6 +1046,7 @@ namespace dxvk { IDirect3DSurface9* pDestSurface, const RECT* pDestRect, D3DTEXTUREFILTERTYPE Filter) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9Surface* dst = static_cast(pDestSurface); @@ -1223,6 +1253,7 @@ namespace dxvk { IDirect3DSurface9* pSurface, const RECT* pRect, D3DCOLOR Color) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9Surface* dst = static_cast(pSurface); @@ -1304,6 +1335,7 @@ namespace dxvk { D3DPOOL Pool, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle) { + ScopedCpuProfileZone(); return CreateOffscreenPlainSurfaceEx( Width, Height, Format, Pool, @@ -1315,6 +1347,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderTarget( DWORD RenderTargetIndex, IDirect3DSurface9* pRenderTarget) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(RenderTargetIndex >= caps::MaxSimultaneousRenderTargets @@ -1433,6 +1466,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTarget( DWORD RenderTargetIndex, IDirect3DSurface9** ppRenderTarget) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppRenderTarget); @@ -1450,6 +1484,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9Surface* ds = static_cast(pNewZStencil); @@ -1480,6 +1515,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppZStencilSurface); @@ -1499,6 +1535,7 @@ namespace dxvk { // Some games don't even call them. HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginScene() { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(m_flags.test(D3D9DeviceFlag::InScene))) @@ -1511,6 +1548,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndScene() { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(!m_flags.test(D3D9DeviceFlag::InScene))) @@ -1531,6 +1569,7 @@ namespace dxvk { D3DCOLOR Color, float Z, DWORD Stencil) { + ScopedCpuProfileZone(); if (unlikely(!Count && pRects)) return D3D_OK; @@ -1690,6 +1729,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX* pMatrix) { + ScopedCpuProfileZone(); bool validState = (State >= D3DTS_VIEW && State <= D3DTS_PROJECTION) || (State >= D3DTS_TEXTURE0 && State <= D3DTS_TEXTURE7) || (State >= D3DTS_WORLDMATRIX(0) && State <= D3DTS_WORLDMATRIX(255)); if (!validState) return D3DERR_INVALIDCALL; @@ -1699,6 +1739,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pMatrix == nullptr)) @@ -1711,6 +1752,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::MultiplyTransform(D3DTRANSFORMSTATETYPE TransformState, const D3DMATRIX* pMatrix) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) @@ -1730,6 +1772,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetViewport(const D3DVIEWPORT9* pViewport) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) @@ -1754,6 +1797,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetViewport(D3DVIEWPORT9* pViewport) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (pViewport == nullptr) @@ -1766,6 +1810,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaterial(const D3DMATERIAL9* pMaterial) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pMaterial == nullptr)) @@ -1782,6 +1827,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaterial(D3DMATERIAL9* pMaterial) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pMaterial == nullptr)) @@ -1794,6 +1840,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetLight(DWORD Index, const D3DLIGHT9* pLight) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pLight == nullptr)) @@ -1822,6 +1869,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLight(DWORD Index, D3DLIGHT9* pLight) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pLight == nullptr)) @@ -1837,6 +1885,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::LightEnable(DWORD Index, BOOL Enable) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(Index >= m_state.lights.size())) @@ -1872,6 +1921,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLightEnable(DWORD Index, BOOL* pEnable) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pEnable == nullptr)) @@ -1887,6 +1937,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipPlane(DWORD Index, const float* pPlane) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(Index >= caps::MaxClipPlanes || !pPlane)) @@ -1930,6 +1981,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); // D3D9 only allows reading for values 0 and 7-255 so we don't need to do anything but return OK @@ -2258,6 +2310,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderState(D3DRENDERSTATETYPE State, DWORD* pValue) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pValue == nullptr)) @@ -2279,6 +2332,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateStateBlock( D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9** ppSB) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppSB); @@ -2299,6 +2353,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginStateBlock() { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(m_recorder != nullptr)) @@ -2311,6 +2366,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndStateBlock(IDirect3DStateBlock9** ppSB) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppSB); @@ -2338,6 +2394,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTexture(DWORD Stage, IDirect3DBaseTexture9** ppTexture) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (ppTexture == nullptr) @@ -2357,6 +2414,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTexture(DWORD Stage, IDirect3DBaseTexture9* pTexture) { + ScopedCpuProfileZone(); if (unlikely(InvalidSampler(Stage))) return D3D_OK; @@ -2370,6 +2428,7 @@ namespace dxvk { DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD* pValue) { + ScopedCpuProfileZone(); auto dxvkType = RemapTextureStageStateType(Type); if (unlikely(pValue == nullptr)) @@ -2392,6 +2451,7 @@ namespace dxvk { DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value) { + ScopedCpuProfileZone(); return SetStateTextureStageState(Stage, RemapTextureStageStateType(Type), Value); } @@ -2400,6 +2460,7 @@ namespace dxvk { DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD* pValue) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pValue == nullptr)) @@ -2422,6 +2483,7 @@ namespace dxvk { DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value) { + ScopedCpuProfileZone(); if (unlikely(InvalidSampler(Sampler))) return D3D_OK; @@ -2464,6 +2526,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetScissorRect(const RECT* pRect) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pRect == nullptr)) @@ -2484,6 +2547,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetScissorRect(RECT* pRect) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pRect == nullptr)) @@ -2784,6 +2848,7 @@ namespace dxvk { IDirect3DVertexBuffer9* pDestBuffer, IDirect3DVertexDeclaration9* pVertexDecl, DWORD Flags) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(pDestBuffer == nullptr || pVertexDecl == nullptr)) @@ -2891,6 +2956,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexDeclaration( const D3DVERTEXELEMENT9* pVertexElements, IDirect3DVertexDeclaration9** ppDecl) { + ScopedCpuProfileZone(); InitReturnPtr(ppDecl); if (unlikely(ppDecl == nullptr || pVertexElements == nullptr)) @@ -2915,6 +2981,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9VertexDecl* decl = static_cast(pDecl); @@ -2942,6 +3009,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexDeclaration(IDirect3DVertexDeclaration9** ppDecl) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppDecl); @@ -2959,6 +3027,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetFVF(DWORD FVF) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (FVF == 0) @@ -2980,6 +3049,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFVF(DWORD* pFVF) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (pFVF == nullptr) @@ -2996,6 +3066,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexShader( const DWORD* pFunction, IDirect3DVertexShader9** ppShader) { + ScopedCpuProfileZone(); // CreateVertexShader does not init the // return ptr unlike CreatePixelShader @@ -3020,6 +3091,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShader(IDirect3DVertexShader9* pShader) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9VertexShader* shader = static_cast(pShader); @@ -3065,6 +3137,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShader(IDirect3DVertexShader9** ppShader) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppShader); @@ -3082,6 +3155,7 @@ namespace dxvk { UINT StartRegister, const float* pConstantData, UINT Vector4fCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< @@ -3097,6 +3171,7 @@ namespace dxvk { UINT StartRegister, float* pConstantData, UINT Vector4fCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< @@ -3112,6 +3187,7 @@ namespace dxvk { UINT StartRegister, const int* pConstantData, UINT Vector4iCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< @@ -3127,6 +3203,7 @@ namespace dxvk { UINT StartRegister, int* pConstantData, UINT Vector4iCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< @@ -3142,6 +3219,7 @@ namespace dxvk { UINT StartRegister, const BOOL* pConstantData, UINT BoolCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< @@ -3157,6 +3235,7 @@ namespace dxvk { UINT StartRegister, BOOL* pConstantData, UINT BoolCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< @@ -3173,6 +3252,7 @@ namespace dxvk { IDirect3DVertexBuffer9* pStreamData, UINT OffsetInBytes, UINT Stride) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(StreamNumber >= caps::MaxStreams)) @@ -3213,6 +3293,7 @@ namespace dxvk { IDirect3DVertexBuffer9** ppStreamData, UINT* pOffsetInBytes, UINT* pStride) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppStreamData); @@ -3240,6 +3321,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(StreamNumber >= caps::MaxStreams)) @@ -3277,6 +3359,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSourceFreq(UINT StreamNumber, UINT* pSetting) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(StreamNumber >= caps::MaxStreams)) @@ -3292,6 +3375,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetIndices(IDirect3DIndexBuffer9* pIndexData) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9IndexBuffer* buffer = static_cast(pIndexData); @@ -3311,6 +3395,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetIndices(IDirect3DIndexBuffer9** ppIndexData) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppIndexData); @@ -3326,6 +3411,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreatePixelShader( const DWORD* pFunction, IDirect3DPixelShader9** ppShader) { + ScopedCpuProfileZone(); InitReturnPtr(ppShader); if (unlikely(ppShader == nullptr)) @@ -3349,6 +3435,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShader(IDirect3DPixelShader9* pShader) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); D3D9PixelShader* shader = static_cast(pShader); @@ -3401,6 +3488,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShader(IDirect3DPixelShader9** ppShader) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppShader); @@ -3418,6 +3506,7 @@ namespace dxvk { UINT StartRegister, const float* pConstantData, UINT Vector4fCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return SetShaderConstants < @@ -3433,6 +3522,7 @@ namespace dxvk { UINT StartRegister, float* pConstantData, UINT Vector4fCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< @@ -3448,6 +3538,7 @@ namespace dxvk { UINT StartRegister, const int* pConstantData, UINT Vector4iCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< @@ -3463,6 +3554,7 @@ namespace dxvk { UINT StartRegister, int* pConstantData, UINT Vector4iCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< @@ -3478,6 +3570,7 @@ namespace dxvk { UINT StartRegister, const BOOL* pConstantData, UINT BoolCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< @@ -3493,6 +3586,7 @@ namespace dxvk { UINT StartRegister, BOOL* pConstantData, UINT BoolCount) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< @@ -3508,6 +3602,7 @@ namespace dxvk { UINT Handle, const float* pNumSegs, const D3DRECTPATCH_INFO* pRectPatchInfo) { + ScopedCpuProfileZone(); static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) @@ -3520,6 +3615,7 @@ namespace dxvk { UINT Handle, const float* pNumSegs, const D3DTRIPATCH_INFO* pTriPatchInfo) { + ScopedCpuProfileZone(); static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) @@ -3529,6 +3625,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DeletePatch(UINT Handle) { + ScopedCpuProfileZone(); static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) @@ -3538,6 +3635,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9** ppQuery) { + ScopedCpuProfileZone(); HRESULT hr = D3D9Query::QuerySupported(this, Type); if (ppQuery == nullptr || hr != D3D_OK) @@ -3594,6 +3692,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::WaitForVBlank(UINT iSwapChain) { + ScopedCpuProfileZone(); if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; @@ -3648,7 +3747,8 @@ namespace dxvk { const RGNDATA* pDirtyRegion, DWORD dwFlags) { FrameMark; - + ScopedCpuProfileZone(); + HRESULT result = m_implicitSwapchain->Present( pSourceRect, pDestRect, @@ -3670,6 +3770,7 @@ namespace dxvk { IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle, DWORD Usage) { + ScopedCpuProfileZone(); InitReturnPtr(ppSurface); if (unlikely(ppSurface == nullptr)) @@ -3714,6 +3815,7 @@ namespace dxvk { IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle, DWORD Usage) { + ScopedCpuProfileZone(); InitReturnPtr(ppSurface); if (unlikely(ppSurface == nullptr)) @@ -3763,6 +3865,7 @@ namespace dxvk { IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle, DWORD Usage) { + ScopedCpuProfileZone(); InitReturnPtr(ppSurface); if (unlikely(ppSurface == nullptr)) @@ -3802,6 +3905,7 @@ namespace dxvk { HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ResetEx( D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode); @@ -3816,6 +3920,7 @@ namespace dxvk { UINT iSwapChain, D3DDISPLAYMODEEX* pMode, D3DDISPLAYROTATION* pRotation) { + ScopedCpuProfileZone(); if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; @@ -3827,6 +3932,7 @@ namespace dxvk { D3DPRESENT_PARAMETERS* pPresentationParameters, const D3DDISPLAYMODEEX* pFullscreenDisplayMode, IDirect3DSwapChain9** ppSwapChain) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppSwapChain); @@ -3861,6 +3967,7 @@ namespace dxvk { DWORD StateSampler, D3DSAMPLERSTATETYPE Type, DWORD Value) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) @@ -3921,6 +4028,7 @@ namespace dxvk { HRESULT D3D9DeviceEx::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) @@ -3992,6 +4100,7 @@ namespace dxvk { HRESULT D3D9DeviceEx::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) @@ -4016,6 +4125,7 @@ namespace dxvk { DWORD Stage, D3D9TextureStageStateTypes Type, DWORD Value) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (unlikely(Stage >= caps::TextureStageCount)) @@ -4174,6 +4284,7 @@ namespace dxvk { void D3D9DeviceEx::PrepareTextures() { + ScopedCpuProfileZone(); const uint32_t usedSamplerMask = m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask; const uint32_t usedTextureMask = m_activeTextures & usedSamplerMask; @@ -4280,6 +4391,7 @@ namespace dxvk { template D3D9BufferSlice D3D9DeviceEx::AllocTempBuffer(VkDeviceSize size) { + ScopedCpuProfileZone(); constexpr VkDeviceSize DefaultSize = 1 << 20; VkMemoryPropertyFlags memoryFlags @@ -4376,6 +4488,7 @@ namespace dxvk { bool D3D9DeviceEx::WaitForResource( const Rc& Resource, DWORD MapFlags) { + ScopedCpuProfileZone(); // Wait for the any pending D3D9 command to be executed // on the CS thread so that we can determine whether the // resource is currently in use or not. @@ -4443,6 +4556,7 @@ namespace dxvk { D3DLOCKED_BOX* pLockedBox, const D3DBOX* pBox, DWORD Flags) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); UINT Subresource = pResource->CalcSubresource(Face, MipLevel); @@ -4712,6 +4826,7 @@ namespace dxvk { D3D9CommonTexture* pResource, UINT Face, UINT MipLevel) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); UINT Subresource = pResource->CalcSubresource(Face, MipLevel); @@ -4753,6 +4868,7 @@ namespace dxvk { HRESULT D3D9DeviceEx::FlushImage( D3D9CommonTexture* pResource, UINT Subresource) { + ScopedCpuProfileZone(); const Rc image = pResource->GetImage(); // Now that data has been written into the buffer, @@ -4874,6 +4990,7 @@ namespace dxvk { void D3D9DeviceEx::EmitGenerateMips( D3D9CommonTexture* pResource) { + ScopedCpuProfileZone(); if (pResource->IsManaged()) UploadManagedTexture(pResource); @@ -5057,6 +5174,7 @@ namespace dxvk { HRESULT D3D9DeviceEx::UnlockBuffer( D3D9CommonBuffer* pResource) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); if (pResource->DecrementLockCount() != 0) @@ -5080,11 +5198,13 @@ namespace dxvk { void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) { + ScopedCpuProfileZone(); m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); } void D3D9DeviceEx::ConsiderFlush(GpuFlushType FlushType) { + ScopedCpuProfileZone(); // NV-DXVK start: deterministic CI runs // While testing in CI, it's important to achieve timing determinism. if (s_explicitFlush) { @@ -5102,6 +5222,7 @@ namespace dxvk { void D3D9DeviceEx::SynchronizeCsThread() { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); // Dispatch current chunk so that all commands @@ -5113,6 +5234,7 @@ namespace dxvk { void D3D9DeviceEx::SetupFPU() { + ScopedCpuProfileZone(); // Should match d3d9 float behaviour. #if defined(_MSC_VER) @@ -5158,6 +5280,7 @@ namespace dxvk { int64_t D3D9DeviceEx::DetermineInitialTextureMemory() { + ScopedCpuProfileZone(); auto memoryProp = m_adapter->GetDXVKAdapter()->memoryProperties(); VkDeviceSize availableTextureMemory = 0; @@ -5180,6 +5303,7 @@ namespace dxvk { VkDeviceSize Size, DxsoProgramType ShaderStage, DxsoConstantBuffers BufferType) { + ScopedCpuProfileZone(); DxvkBufferCreateInfo info = { }; info.usage = SSBO ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; info.access = SSBO ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_UNIFORM_READ_BIT; @@ -5218,6 +5342,7 @@ namespace dxvk { void D3D9DeviceEx::CreateConstantBuffers() { + ScopedCpuProfileZone(); if (!m_isSWVP) { m_consts[DxsoProgramTypes::VertexShader].buffer = CreateConstantBuffer(false, @@ -5267,6 +5392,7 @@ namespace dxvk { inline void D3D9DeviceEx::UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout) { + ScopedCpuProfileZone(); /* * SWVP raises the amount of constants by a lot. * To avoid copying huge amounts of data for every draw call, @@ -5325,6 +5451,7 @@ namespace dxvk { inline DxvkBufferSliceHandle D3D9DeviceEx::CopySoftwareConstants(DxsoConstantBuffers cBufferTarget, Rc& dstBuffer, const void* src, uint32_t size, bool useSSBO) { + ScopedCpuProfileZone(); uint32_t alignment = useSSBO ? m_robustSSBOAlignment : m_robustUBOAlignment; alignment = std::max(alignment, 64u); size = std::max(size, alignment); @@ -5351,6 +5478,7 @@ namespace dxvk { template inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { + ScopedCpuProfileZone(); /* * We just copy the float constants that have been set by the application and rely on robustness * to return 0 on OOB reads. @@ -5421,6 +5549,7 @@ namespace dxvk { template void D3D9DeviceEx::UploadConstants() { + ScopedCpuProfileZone(); if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) { if (CanSWVP()) return UploadSoftwareConstantSet(m_state.vsConsts, m_vsLayout); @@ -5456,7 +5585,10 @@ namespace dxvk { template void D3D9DeviceEx::UpdatePushConstant(const void* pData) { - struct ConstantData { uint8_t Data[Length]; }; + ScopedCpuProfileZone(); + struct ConstantData { + uint8_t Data[Length]; + }; auto* constData = reinterpret_cast(pData); @@ -5471,6 +5603,7 @@ namespace dxvk { template void D3D9DeviceEx::UpdatePushConstant() { + ScopedCpuProfileZone(); auto& rs = m_state.renderStates; if constexpr (Item == D3D9RenderStateItem::AlphaRef) { @@ -5531,6 +5664,7 @@ namespace dxvk { void D3D9DeviceEx::Flush() { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); m_initializer->Flush(); @@ -5558,6 +5692,7 @@ namespace dxvk { inline void D3D9DeviceEx::UpdateBoundRTs(uint32_t index) { + ScopedCpuProfileZone(); const uint32_t bit = 1 << index; m_boundRTs &= ~bit; @@ -5569,6 +5704,7 @@ namespace dxvk { inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) { + ScopedCpuProfileZone(); const uint32_t bit = 1 << index; m_activeRTs &= ~bit; @@ -5583,6 +5719,7 @@ namespace dxvk { inline void D3D9DeviceEx::UpdateActiveTextures(uint32_t index, DWORD combinedUsage) { + ScopedCpuProfileZone(); const uint32_t bit = 1 << index; m_activeRTTextures &= ~bit; @@ -5617,6 +5754,7 @@ namespace dxvk { inline void D3D9DeviceEx::UpdateActiveHazardsRT(uint32_t rtMask) { + ScopedCpuProfileZone(); auto masks = m_psShaderMasks; masks.rtMask &= m_activeRTs & rtMask; masks.samplerMask &= m_activeRTTextures; @@ -5644,6 +5782,7 @@ namespace dxvk { inline void D3D9DeviceEx::UpdateActiveHazardsDS(uint32_t texMask) { + ScopedCpuProfileZone(); m_activeHazardsDS = m_activeHazardsDS & (~texMask); if (m_state.depthStencil != nullptr && m_state.depthStencil->GetBaseTexture() != nullptr) { @@ -5662,6 +5801,7 @@ namespace dxvk { void D3D9DeviceEx::MarkRenderHazards() { + ScopedCpuProfileZone(); for (uint32_t rtIdx : bit::BitMask(m_activeHazardsRT)) { // Guaranteed to not be nullptr... auto tex = m_state.renderTargets[rtIdx]->GetCommonTexture(); @@ -5674,6 +5814,7 @@ namespace dxvk { void D3D9DeviceEx::UploadManagedTexture(D3D9CommonTexture* pResource) { + ScopedCpuProfileZone(); for (uint32_t subresource = 0; subresource < pResource->CountSubresources(); subresource++) { if (!pResource->NeedsUpload(subresource) || pResource->GetBuffer(subresource) == nullptr) continue; @@ -5714,6 +5855,7 @@ namespace dxvk { void D3D9DeviceEx::MarkTextureMipsDirty(D3D9CommonTexture* pResource) { + ScopedCpuProfileZone(); pResource->SetNeedsMipGen(true); pResource->MarkAllWrittenByGPU(); @@ -5731,6 +5873,7 @@ namespace dxvk { void D3D9DeviceEx::MarkTextureMipsUnDirty(D3D9CommonTexture* pResource) { + ScopedCpuProfileZone(); pResource->SetNeedsMipGen(false); for (uint32_t i : bit::BitMask(m_activeTextures)) { @@ -5744,6 +5887,7 @@ namespace dxvk { void D3D9DeviceEx::MarkTextureUploaded(D3D9CommonTexture* pResource) { + ScopedCpuProfileZone(); for (uint32_t i : bit::BitMask(m_activeTextures)) { // Guaranteed to not be nullptr... auto texInfo = GetCommonTexture(m_state.textures[i]); @@ -5756,6 +5900,7 @@ namespace dxvk { template void D3D9DeviceEx::UpdatePointMode() { + ScopedCpuProfileZone(); if constexpr (!Points) { m_lastPointMode = 0; @@ -6206,6 +6351,7 @@ namespace dxvk { void D3D9DeviceEx::BindDepthStencilRefrence() { + ScopedCpuProfileZone(); auto& rs = m_state.renderStates; uint32_t ref = uint32_t(rs[D3DRS_STENCILREF]) & 0xff; @@ -6217,6 +6363,7 @@ namespace dxvk { void D3D9DeviceEx::BindSampler(DWORD Sampler) { + ScopedCpuProfileZone(); auto& state = m_state.samplerStates[Sampler]; const D3D9SamplerKey key = CreateSamplerKey(Sampler); @@ -6276,6 +6423,7 @@ namespace dxvk { void D3D9DeviceEx::UnbindTextures(uint32_t mask) { + ScopedCpuProfileZone(); EmitCs([ cMask = mask ](DxvkContext* ctx) { @@ -6302,7 +6450,8 @@ namespace dxvk { void D3D9DeviceEx::UndirtyTextures(uint32_t usedMask) { - const uint32_t activeMask = usedMask & m_activeTextures; + ScopedCpuProfileZone(); + const uint32_t activeMask = usedMask & m_activeTextures; const uint32_t inactiveMask = usedMask & ~m_activeTextures; for (uint32_t i : bit::BitMask(activeMask)) @@ -6328,6 +6477,7 @@ namespace dxvk { D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, UINT InstanceCount) { + ScopedCpuProfileZone(); D3D9DrawInfo drawInfo; drawInfo.vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); drawInfo.instanceCount = m_iaState.streamsInstanced & m_iaState.streamsUsed @@ -6338,6 +6488,7 @@ namespace dxvk { uint32_t D3D9DeviceEx::GetInstanceCount() const { + ScopedCpuProfileZone(); return std::max(m_state.streamFreq[0] & 0x7FFFFFu, 1u); } @@ -6540,6 +6691,7 @@ namespace dxvk { void D3D9DeviceEx::BindShader( const D3D9CommonShader* pShaderModule, D3D9ShaderPermutation Permutation) { + ScopedCpuProfileZone(); EmitCs([ cShader = pShaderModule->GetShader(Permutation) ] (DxvkContext* ctx) { @@ -6549,6 +6701,7 @@ namespace dxvk { void D3D9DeviceEx::BindInputLayout() { + ScopedCpuProfileZone(); m_flags.clr(D3D9DeviceFlag::DirtyInputLayout); if (m_state.vertexDecl == nullptr) { @@ -6665,6 +6818,7 @@ namespace dxvk { D3D9VertexBuffer* pBuffer, UINT Offset, UINT Stride) { + ScopedCpuProfileZone(); EmitCs([ cSlotId = Slot, cBufferSlice = pBuffer != nullptr ? @@ -6677,6 +6831,7 @@ namespace dxvk { } void D3D9DeviceEx::BindIndices() { + ScopedCpuProfileZone(); D3D9CommonBuffer* buffer = GetCommonBuffer(m_state.indices); D3D9Format format = buffer != nullptr @@ -6695,6 +6850,7 @@ namespace dxvk { void D3D9DeviceEx::Begin(D3D9Query* pQuery) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); // NV-DXVK start: Don't raytrace occlusion queries @@ -6710,6 +6866,7 @@ namespace dxvk { void D3D9DeviceEx::End(D3D9Query* pQuery) { + ScopedCpuProfileZone(); D3D9DeviceLock lock = LockDevice(); // NV-DXVK start: Don't raytrace occlusion queries @@ -6734,6 +6891,7 @@ namespace dxvk { void D3D9DeviceEx::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + ScopedCpuProfileZone(); m_state.vsConsts.bConsts[idx] &= ~mask; m_state.vsConsts.bConsts[idx] |= bits & mask; @@ -6742,6 +6900,7 @@ namespace dxvk { void D3D9DeviceEx::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + ScopedCpuProfileZone(); m_state.psConsts.bConsts[idx] &= ~mask; m_state.psConsts.bConsts[idx] |= bits & mask; @@ -6754,6 +6913,7 @@ namespace dxvk { VkShaderStageFlagBits ShaderStage, const DWORD* pShaderBytecode, const DxsoModuleInfo* pModuleInfo) { + ScopedCpuProfileZone(); try { m_shaderModules->GetShaderModule(this, pShaderModule, ShaderStage, pModuleInfo, pShaderBytecode); @@ -6775,6 +6935,7 @@ namespace dxvk { UINT StartRegister, const T* pConstantData, UINT Count) { + ScopedCpuProfileZone(); const uint32_t regCountHardware = DetermineHardwareRegCount(); constexpr uint32_t regCountSoftware = DetermineSoftwareRegCount(); @@ -7245,6 +7406,7 @@ namespace dxvk { bool D3D9DeviceEx::UseProgrammableVS() { + ScopedCpuProfileZone(); return m_state.vertexShader != nullptr && m_state.vertexDecl != nullptr && !m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT); @@ -7252,10 +7414,12 @@ namespace dxvk { bool D3D9DeviceEx::UseProgrammablePS() { + ScopedCpuProfileZone(); return m_state.pixelShader != nullptr; } void D3D9DeviceEx::UpdateBoolSpecConstantVertex(uint32_t value) { + ScopedCpuProfileZone(); if (value == m_lastBoolSpecConstantVertex) return; @@ -7268,6 +7432,7 @@ namespace dxvk { void D3D9DeviceEx::UpdateBoolSpecConstantPixel(uint32_t value) { + ScopedCpuProfileZone(); if (value == m_lastBoolSpecConstantPixel) return; @@ -7280,6 +7445,7 @@ namespace dxvk { void D3D9DeviceEx::UpdateSamplerSpecConsant(uint32_t value) { + ScopedCpuProfileZone(); EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerType, cBitfield); }); @@ -7289,6 +7455,7 @@ namespace dxvk { void D3D9DeviceEx::UpdateProjectionSpecConstant(uint32_t value) { + ScopedCpuProfileZone(); EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::ProjectionType, cBitfield); }); @@ -7307,6 +7474,7 @@ namespace dxvk { void D3D9DeviceEx::UpdateSamplerDepthModeSpecConstant(uint32_t value) { + ScopedCpuProfileZone(); EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerDepthMode, cBitfield); }); @@ -7318,6 +7486,7 @@ namespace dxvk { void D3D9DeviceEx::ApplyPrimitiveType( DxvkContext* pContext, D3DPRIMITIVETYPE PrimType) { + ScopedCpuProfileZone(); if (m_iaState.primitiveType != PrimType) { m_iaState.primitiveType = PrimType; @@ -7328,7 +7497,8 @@ namespace dxvk { void D3D9DeviceEx::ResolveZ() { - D3D9Surface* src = m_state.depthStencil.ptr(); + ScopedCpuProfileZone(); + D3D9Surface* src = m_state.depthStencil.ptr(); IDirect3DBaseTexture9* dst = m_state.textures[0]; if (unlikely(!src || !dst)) @@ -7415,6 +7585,7 @@ namespace dxvk { void D3D9DeviceEx::TransitionImage(D3D9CommonTexture* pResource, VkImageLayout NewLayout) { + ScopedCpuProfileZone(); EmitCs([ cImage = pResource->GetImage(), cNewLayout = NewLayout @@ -7430,6 +7601,7 @@ namespace dxvk { const VkImageSubresourceRange* pSubresources, VkImageLayout OldLayout, VkImageLayout NewLayout) { + ScopedCpuProfileZone(); EmitCs([ cImage = pResource->GetImage(), cSubresources = *pSubresources, @@ -7444,6 +7616,7 @@ namespace dxvk { HRESULT D3D9DeviceEx::ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters) { + ScopedCpuProfileZone(); if (!pPresentationParameters->EnableAutoDepthStencil) SetDepthStencilSurface(nullptr); @@ -7692,6 +7865,7 @@ namespace dxvk { HRESULT D3D9DeviceEx::ResetSwapChain(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + ScopedCpuProfileZone(); D3D9Format backBufferFmt = EnumerateFormat(pPresentationParameters->BackBufferFormat); Logger::info(str::format( "D3D9DeviceEx::ResetSwapChain:\n", @@ -7762,6 +7936,7 @@ namespace dxvk { HRESULT D3D9DeviceEx::InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + ScopedCpuProfileZone(); HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode); if (FAILED(hr)) return hr; @@ -7777,8 +7952,8 @@ namespace dxvk { } // NV-DXVK start: external API - D3D9SwapchainExternal* D3D9DeviceEx::GetExternalPresenter() - { + D3D9SwapchainExternal* D3D9DeviceEx::GetExternalPresenter() { + ScopedCpuProfileZone(); if (m_withExternalSwapchain) { return static_cast(m_implicitSwapchain.ptr()); } @@ -7788,6 +7963,7 @@ namespace dxvk { void D3D9DeviceEx::TrackBufferMappingBufferSequenceNumber( D3D9CommonBuffer* pResource) { + ScopedCpuProfileZone(); uint64_t sequenceNumber = GetCurrentSequenceNumber(); pResource->TrackMappingBufferSequenceNumber(sequenceNumber); } @@ -7795,11 +7971,13 @@ namespace dxvk { void D3D9DeviceEx::TrackTextureMappingBufferSequenceNumber( D3D9CommonTexture* pResource, UINT Subresource) { + ScopedCpuProfileZone(); uint64_t sequenceNumber = GetCurrentSequenceNumber(); pResource->TrackMappingBufferSequenceNumber(Subresource, sequenceNumber); } uint64_t D3D9DeviceEx::GetCurrentSequenceNumber() { + ScopedCpuProfileZone(); // We do not flush empty chunks, so if we are tracking a resource // immediately after a flush, we need to use the sequence number // of the previously submitted chunk to prevent deadlocks. diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 6d21f325..fe232dd3 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -57,11 +57,13 @@ namespace dxvk { // NV-DXVK start: DLFG integration bool DxvkContext::isDLFGEnabled() const { + ScopedCpuProfileZone(); return m_common->metaNGXContext().supportsDLFG() && DxvkDLFG::enable() && !m_common->metaDLFG().hasDLFGFailed(); } // NV-DXVK end void DxvkContext::beginRecording(const Rc& cmdList) { + ScopedCpuProfileZone(); m_cmd = cmdList; m_cmd->beginRecording(); @@ -101,6 +103,7 @@ namespace dxvk { Rc DxvkContext::endRecording() { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->flushSharedImages(); @@ -114,6 +117,7 @@ namespace dxvk { void DxvkContext::flushCommandList() { + ScopedCpuProfileZone(); m_device->submitCommandList( this->endRecording(), VK_NULL_HANDLE, @@ -129,17 +133,20 @@ namespace dxvk { void DxvkContext::beginQuery(const Rc& query) { + ScopedCpuProfileZone(); m_queryManager.enableQuery(m_cmd, query); } void DxvkContext::endQuery(const Rc& query) { + ScopedCpuProfileZone(); m_queryManager.disableQuery(m_cmd, query); } void DxvkContext::bindRenderTargets( const DxvkRenderTargets& targets) { + ScopedCpuProfileZone(); // Set up default render pass ops m_state.om.renderTargets = targets; @@ -163,6 +170,7 @@ namespace dxvk { void DxvkContext::bindDrawBuffers( const DxvkBufferSlice& argBuffer, const DxvkBufferSlice& cntBuffer) { + ScopedCpuProfileZone(); m_state.id.argBuffer = argBuffer; m_state.id.cntBuffer = cntBuffer; @@ -173,6 +181,7 @@ namespace dxvk { void DxvkContext::bindIndexBuffer( const DxvkBufferSlice& buffer, VkIndexType indexType) { + ScopedCpuProfileZone(); if (!m_state.vi.indexBuffer.matchesBuffer(buffer)) m_vbTracked.clr(MaxNumVertexBindings); @@ -186,6 +195,7 @@ namespace dxvk { void DxvkContext::bindResourceBuffer( uint32_t slot, const DxvkBufferSlice& buffer) { + ScopedCpuProfileZone(); bool needsUpdate = !m_rc[slot].bufferSlice.matchesBuffer(buffer); if (likely(needsUpdate)) @@ -214,6 +224,7 @@ namespace dxvk { uint32_t slot, const Rc& imageView, const Rc& bufferView) { + ScopedCpuProfileZone(); m_rc[slot].imageView = imageView; m_rc[slot].bufferView = bufferView; m_rc[slot].bufferSlice = bufferView != nullptr @@ -231,6 +242,7 @@ namespace dxvk { void DxvkContext::bindResourceSampler( uint32_t slot, const Rc& sampler) { + ScopedCpuProfileZone(); m_rc[slot].sampler = sampler; m_rcTracked.clr(slot); @@ -245,6 +257,7 @@ namespace dxvk { void DxvkContext::bindAccelerationStructure( uint32_t slot, const Rc accelStructure) { + ScopedCpuProfileZone(); m_rc[slot].tlas = accelStructure->getAccelStructure(); m_rcTracked.clr(slot); @@ -259,6 +272,7 @@ namespace dxvk { void DxvkContext::bindRaytracingPipelineShaders( const DxvkRaytracingPipelineShaders& shaders) { + ScopedCpuProfileZone(); m_state.rp.shaders = shaders; @@ -273,6 +287,7 @@ namespace dxvk { void DxvkContext::bindShader( VkShaderStageFlagBits stage, const Rc& shader) { + ScopedCpuProfileZone(); Rc* shaderStage; switch (stage) { @@ -306,6 +321,7 @@ namespace dxvk { uint32_t binding, const DxvkBufferSlice& buffer, uint32_t stride) { + ScopedCpuProfileZone(); if (!m_state.vi.vertexBuffers[binding].matchesBuffer(buffer)) m_vbTracked.clr(binding); @@ -327,6 +343,7 @@ namespace dxvk { uint32_t binding, const DxvkBufferSlice& buffer, const DxvkBufferSlice& counter) { + ScopedCpuProfileZone(); if (!m_state.xfb.buffers[binding].matches(buffer) || !m_state.xfb.counters[binding].matches(counter)) { m_state.xfb.buffers[binding] = buffer; @@ -344,6 +361,7 @@ namespace dxvk { const VkComponentMapping& srcMapping, const VkImageBlit& region, VkFilter filter) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->prepareImage(m_execBarriers, dstImage, vk::makeSubresourceRange(region.dstSubresource)); this->prepareImage(m_execBarriers, srcImage, vk::makeSubresourceRange(region.srcSubresource)); @@ -377,6 +395,7 @@ namespace dxvk { void DxvkContext::changeImageLayout( const Rc& image, VkImageLayout layout) { + ScopedCpuProfileZone(); if (image->info().layout != layout) { this->spillRenderPass(true); @@ -406,6 +425,7 @@ namespace dxvk { VkDeviceSize offset, VkDeviceSize length, uint32_t value) { + ScopedCpuProfileZone(); this->spillRenderPass(true); length = align(length, sizeof(uint32_t)); @@ -435,6 +455,7 @@ namespace dxvk { VkDeviceSize offset, VkDeviceSize length, VkClearColorValue value) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->unbindComputePipeline(); @@ -511,6 +532,7 @@ namespace dxvk { const Rc& image, const VkClearColorValue& value, const VkImageSubresourceRange& subresources) { + ScopedCpuProfileZone(); this->spillRenderPass(false); VkImageLayout imageLayoutClear = image->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); @@ -541,6 +563,7 @@ namespace dxvk { const Rc& image, const VkClearDepthStencilValue& value, const VkImageSubresourceRange& subresources) { + ScopedCpuProfileZone(); this->spillRenderPass(false); m_execBarriers.recordCommands(m_cmd); @@ -573,6 +596,7 @@ namespace dxvk { void DxvkContext::clearCompressedColorImage( const Rc& image, const VkImageSubresourceRange& subresources) { + ScopedCpuProfileZone(); this->spillRenderPass(false); VkImageLayout layout = image->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); @@ -650,6 +674,7 @@ namespace dxvk { const Rc& imageView, VkImageAspectFlags clearAspects, VkClearValue clearValue) { + ScopedCpuProfileZone(); // Make sure the color components are ordered correctly if (clearAspects & VK_IMAGE_ASPECT_COLOR_BIT) { clearValue.color = util::swizzleClearColor(clearValue.color, @@ -706,6 +731,7 @@ namespace dxvk { VkExtent3D extent, VkImageAspectFlags aspect, VkClearValue value) { + ScopedCpuProfileZone(); const VkImageUsageFlags viewUsage = imageView->info().usage; if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) { @@ -785,6 +811,7 @@ namespace dxvk { VkDeviceSize dstOffset, VkDeviceSize srcOffset, VkDeviceSize numBytes) { + ScopedCpuProfileZone(); VkDeviceSize loOvl = std::max(dstOffset, srcOffset); VkDeviceSize hiOvl = std::min(dstOffset, srcOffset) + numBytes; @@ -824,6 +851,7 @@ namespace dxvk { VkDeviceSize srcOffset, VkDeviceSize rowAlignment, VkDeviceSize sliceAlignment) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->prepareImage(m_execBarriers, dstImage, vk::makeSubresourceRange(dstSubresource)); @@ -894,6 +922,7 @@ namespace dxvk { VkImageSubresourceLayers srcSubresource, VkOffset3D srcOffset, VkExtent3D extent) { + ScopedCpuProfileZone(); this->spillRenderPass(true); if (this->copyImageClear(dstImage, dstSubresource, dstOffset, extent, srcImage, srcSubresource)) @@ -935,6 +964,7 @@ namespace dxvk { VkOffset3D dstOffset, VkOffset3D srcOffset, VkExtent3D extent) { + ScopedCpuProfileZone(); VkOffset3D loOvl = { std::max(dstOffset.x, srcOffset.x), std::max(dstOffset.y, srcOffset.y), @@ -1011,6 +1041,7 @@ namespace dxvk { VkImageSubresourceLayers srcSubresource, VkOffset3D srcOffset, VkExtent3D srcExtent) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->prepareImage(m_execBarriers, srcImage, vk::makeSubresourceRange(srcSubresource)); @@ -1077,6 +1108,7 @@ namespace dxvk { VkOffset2D srcOffset, VkExtent2D srcExtent, VkFormat format) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->prepareImage(m_execBarriers, srcImage, vk::makeSubresourceRange(srcSubresource)); @@ -1197,6 +1229,7 @@ namespace dxvk { VkExtent3D srcSize, VkExtent3D extent, VkDeviceSize elementSize) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->unbindComputePipeline(); @@ -1362,6 +1395,7 @@ namespace dxvk { VkOffset2D srcOffset, VkExtent2D srcExtent, VkFormat format) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->prepareImage(m_execBarriers, dstImage, vk::makeSubresourceRange(dstSubresource)); @@ -1538,6 +1572,7 @@ namespace dxvk { void DxvkContext::discardBuffer( const Rc& buffer) { + ScopedCpuProfileZone(); if (buffer->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) return; @@ -1549,6 +1584,7 @@ namespace dxvk { void DxvkContext::discardImageView( const Rc& imageView, VkImageAspectFlags discardAspects) { + ScopedCpuProfileZone(); VkImageUsageFlags viewUsage = imageView->info().usage; // Ignore non-render target views since there's likely no good use case for @@ -1565,6 +1601,7 @@ namespace dxvk { uint32_t x, uint32_t y, uint32_t z) { + ScopedCpuProfileZone(); if (this->commitComputeState()) { this->commitComputeInitBarriers(); @@ -1585,6 +1622,7 @@ namespace dxvk { void DxvkContext::dispatchIndirect( VkDeviceSize offset) { + ScopedCpuProfileZone(); auto bufferSlice = m_state.id.argBuffer.getSliceHandle( offset, sizeof(VkDispatchIndirectCommand)); @@ -1623,6 +1661,7 @@ namespace dxvk { uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) { + ScopedCpuProfileZone(); if (this->commitGraphicsState()) { m_cmd->cmdDraw( @@ -1638,6 +1677,7 @@ namespace dxvk { VkDeviceSize offset, uint32_t count, uint32_t stride) { + ScopedCpuProfileZone(); if (this->commitGraphicsState()) { auto descriptor = m_state.id.argBuffer.getDescriptor(); @@ -1657,6 +1697,7 @@ namespace dxvk { VkDeviceSize countOffset, uint32_t maxCount, uint32_t stride) { + ScopedCpuProfileZone(); if (this->commitGraphicsState()) { auto argDescriptor = m_state.id.argBuffer.getDescriptor(); auto cntDescriptor = m_state.id.cntBuffer.getDescriptor(); @@ -1679,6 +1720,7 @@ namespace dxvk { uint32_t firstIndex, uint32_t vertexOffset, uint32_t firstInstance) { + ScopedCpuProfileZone(); if (this->commitGraphicsState()) { m_cmd->cmdDrawIndexed( indexCount, instanceCount, @@ -1694,6 +1736,7 @@ namespace dxvk { VkDeviceSize offset, uint32_t count, uint32_t stride) { + ScopedCpuProfileZone(); if (this->commitGraphicsState()) { auto descriptor = m_state.id.argBuffer.getDescriptor(); @@ -1712,6 +1755,7 @@ namespace dxvk { VkDeviceSize countOffset, uint32_t maxCount, uint32_t stride) { + ScopedCpuProfileZone(); if (this->commitGraphicsState()) { auto argDescriptor = m_state.id.argBuffer.getDescriptor(); auto cntDescriptor = m_state.id.cntBuffer.getDescriptor(); @@ -1732,6 +1776,7 @@ namespace dxvk { const DxvkBufferSlice& counterBuffer, uint32_t counterDivisor, uint32_t counterBias) { + ScopedCpuProfileZone(); if (this->commitGraphicsState()) { auto physSlice = counterBuffer.getSliceHandle(); @@ -1747,6 +1792,7 @@ namespace dxvk { void DxvkContext::emitRenderTargetReadbackBarrier() { + ScopedCpuProfileZone(); if (m_flags.test(DxvkContextFlag::GpRenderPassBound)) { emitMemoryBarrier(VK_DEPENDENCY_BY_REGION_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, @@ -1761,6 +1807,7 @@ namespace dxvk { const Rc& image, const VkImageSubresourceRange& subresources, VkImageLayout initialLayout) { + ScopedCpuProfileZone(); m_execBarriers.accessImage(image, subresources, initialLayout, 0, 0, image->info().layout, @@ -1776,6 +1823,7 @@ namespace dxvk { void DxvkContext::generateMipmaps( const Rc& imageView, VkFilter filter) { + ScopedCpuProfileZone(); if (imageView->info().numLevels <= 1) return; @@ -1882,6 +1930,7 @@ namespace dxvk { void DxvkContext::invalidateBuffer( const Rc& buffer, const DxvkBufferSliceHandle& slice) { + ScopedCpuProfileZone(); // Allocate new backing resource DxvkBufferSliceHandle prevSlice = buffer->rename(slice); m_cmd->freeBufferSlice(buffer, prevSlice); @@ -1934,6 +1983,7 @@ namespace dxvk { uint32_t offset, uint32_t size, const void* data) { + ScopedCpuProfileZone(); assert(size + offset <= MaxPushConstantSize); // NV-DXVK start: multiple push const contexts std::memcpy(&m_state.pc.data[(uint32_t)m_state.pc.constantBank][offset], data, size); @@ -1945,6 +1995,7 @@ namespace dxvk { // NV-DXVK start: multiple push const contexts void DxvkContext::setPushConstantBank( DxvkPushConstantBank constantBank) { + ScopedCpuProfileZone(); if (constantBank >= DxvkPushConstantBank::Count) { Logger::err("DxvkContext: setPushConstantBank: invalid bank index"); return; @@ -1965,6 +2016,7 @@ namespace dxvk { const Rc& srcImage, const VkImageResolve& region, VkFormat format) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->prepareImage(m_execBarriers, dstImage, vk::makeSubresourceRange(region.dstSubresource)); this->prepareImage(m_execBarriers, srcImage, vk::makeSubresourceRange(region.srcSubresource)); @@ -1999,6 +2051,7 @@ namespace dxvk { const VkImageResolve& region, VkResolveModeFlagBitsKHR depthMode, VkResolveModeFlagBitsKHR stencilMode) { + ScopedCpuProfileZone(); this->spillRenderPass(true); this->prepareImage(m_execBarriers, dstImage, vk::makeSubresourceRange(region.dstSubresource)); this->prepareImage(m_execBarriers, srcImage, vk::makeSubresourceRange(region.srcSubresource)); @@ -2053,6 +2106,7 @@ namespace dxvk { const VkImageSubresourceRange& dstSubresources, VkImageLayout srcLayout, VkImageLayout dstLayout) { + ScopedCpuProfileZone(); this->spillRenderPass(false); if (srcLayout != dstLayout) { @@ -2078,6 +2132,7 @@ namespace dxvk { VkImageAspectFlags discardAspects, VkImageAspectFlags clearAspects, VkClearValue clearValue) { + ScopedCpuProfileZone(); DxvkColorAttachmentOps colorOp; colorOp.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; colorOp.loadLayout = imageView->imageInfo().layout; @@ -2207,6 +2262,7 @@ namespace dxvk { const Rc& imageView, VkImageAspectFlags clearAspects, VkClearValue clearValue) { + ScopedCpuProfileZone(); for (auto& entry : m_deferredClears) { if (entry.imageView->matchesView(imageView)) { entry.imageView = imageView; @@ -2233,6 +2289,7 @@ namespace dxvk { void DxvkContext::deferDiscard( const Rc& imageView, VkImageAspectFlags discardAspects) { + ScopedCpuProfileZone(); for (auto& entry : m_deferredClears) { if (entry.imageView->matchesView(imageView)) { entry.imageView = imageView; @@ -2251,6 +2308,7 @@ namespace dxvk { void DxvkContext::flushClears( bool useRenderPass) { + ScopedCpuProfileZone(); for (const auto& clear : m_deferredClears) { int32_t attachmentIndex = -1; @@ -2266,6 +2324,7 @@ namespace dxvk { void DxvkContext::flushSharedImages() { + ScopedCpuProfileZone(); for (auto i = m_deferredClears.begin(); i != m_deferredClears.end(); ) { if (i->imageView->imageInfo().shared) { this->performClear(i->imageView, -1, i->discardAspects, i->clearAspects, i->clearValue); @@ -2285,6 +2344,7 @@ namespace dxvk { VkDeviceSize offset, VkDeviceSize size, const void* data) { + ScopedCpuProfileZone(); bool replaceBuffer = this->tryInvalidateDeviceLocalBuffer(buffer, size); auto bufferSlice = buffer->getSliceHandle(offset, size); @@ -2324,6 +2384,7 @@ namespace dxvk { VkDeviceSize offset, VkDeviceSize size, const void* data) { + ScopedCpuProfileZone(); if (size < 65536 && size % 4 == 0) { updateBuffer(buffer, offset, size, data); @@ -2373,6 +2434,7 @@ namespace dxvk { const void* data, VkDeviceSize pitchPerRow, VkDeviceSize pitchPerLayer) { + ScopedCpuProfileZone(); this->spillRenderPass(true); // Upload data through a staging buffer. Special care needs to @@ -2464,6 +2526,7 @@ namespace dxvk { VkDeviceSize pitchPerRow, VkDeviceSize pitchPerLayer, VkFormat format) { + ScopedCpuProfileZone(); auto formatInfo = imageFormatInfo(format); VkExtent3D extent3D; @@ -2498,6 +2561,7 @@ namespace dxvk { const Rc& buffer, const void* data, uint32_t length) { + ScopedCpuProfileZone(); auto bufferSlice = buffer->getSliceHandle(); if (length == 0) @@ -2538,6 +2602,7 @@ namespace dxvk { const void* data, VkDeviceSize pitchPerRow, VkDeviceSize pitchPerLayer) { + ScopedCpuProfileZone(); const DxvkFormatInfo* formatInfo = image->formatInfo(); VkOffset3D imageOffset = { 0, 0, 0 }; @@ -2617,6 +2682,7 @@ namespace dxvk { uint32_t viewportCount, const VkViewport* viewports, const VkRect2D* scissorRects) { + ScopedCpuProfileZone(); if (m_state.gp.state.rs.viewportCount() != viewportCount) { m_state.gp.state.rs.setViewportCount(viewportCount); m_flags.set(DxvkContextFlag::GpDirtyPipelineState); @@ -2644,6 +2710,7 @@ namespace dxvk { void DxvkContext::setBlendConstants( DxvkBlendConstants blendConstants) { + ScopedCpuProfileZone(); if (m_state.dyn.blendConstants != blendConstants) { m_state.dyn.blendConstants = blendConstants; m_flags.set(DxvkContextFlag::GpDirtyBlendConstants); @@ -2653,6 +2720,7 @@ namespace dxvk { void DxvkContext::setDepthBias( DxvkDepthBias depthBias) { + ScopedCpuProfileZone(); if (m_state.dyn.depthBias != depthBias) { m_state.dyn.depthBias = depthBias; m_flags.set(DxvkContextFlag::GpDirtyDepthBias); @@ -2662,6 +2730,7 @@ namespace dxvk { void DxvkContext::setDepthBounds( DxvkDepthBounds depthBounds) { + ScopedCpuProfileZone(); if (m_state.dyn.depthBounds != depthBounds) { m_state.dyn.depthBounds = depthBounds; m_flags.set(DxvkContextFlag::GpDirtyDepthBounds); @@ -2676,6 +2745,7 @@ namespace dxvk { void DxvkContext::setStencilReference( uint32_t reference) { + ScopedCpuProfileZone(); if (m_state.dyn.stencilReference != reference) { m_state.dyn.stencilReference = reference; m_flags.set(DxvkContextFlag::GpDirtyStencilRef); @@ -2684,6 +2754,7 @@ namespace dxvk { void DxvkContext::setInputAssemblyState(const DxvkInputAssemblyState& ia) { + ScopedCpuProfileZone(); m_state.gp.state.ia = DxvkIaInfo( ia.primitiveTopology, ia.primitiveRestart, @@ -2698,6 +2769,7 @@ namespace dxvk { const DxvkVertexAttribute* attributes, uint32_t bindingCount, const DxvkVertexBinding* bindings) { + ScopedCpuProfileZone(); m_flags.set( DxvkContextFlag::GpDirtyPipelineState, DxvkContextFlag::GpDirtyVertexBuffers); @@ -2725,6 +2797,7 @@ namespace dxvk { void DxvkContext::setRasterizerState(const DxvkRasterizerState& rs) { + ScopedCpuProfileZone(); m_state.gp.state.rs = DxvkRsInfo( rs.depthClipEnable, rs.depthBiasEnable, @@ -2740,6 +2813,7 @@ namespace dxvk { void DxvkContext::setMultisampleState(const DxvkMultisampleState& ms) { + ScopedCpuProfileZone(); m_state.gp.state.ms = DxvkMsInfo( m_state.gp.state.ms.sampleCount(), ms.sampleMask, @@ -2750,6 +2824,7 @@ namespace dxvk { void DxvkContext::setDepthStencilState(const DxvkDepthStencilState& ds) { + ScopedCpuProfileZone(); m_state.gp.state.ds = DxvkDsInfo( ds.enableDepthTest, ds.enableDepthWrite, @@ -2765,6 +2840,7 @@ namespace dxvk { void DxvkContext::setLogicOpState(const DxvkLogicOpState& lo) { + ScopedCpuProfileZone(); m_state.gp.state.om = DxvkOmInfo( lo.enableLogicOp, lo.logicOp); @@ -2776,6 +2852,7 @@ namespace dxvk { void DxvkContext::setBlendMode( uint32_t attachment, const DxvkBlendMode& blendMode) { + ScopedCpuProfileZone(); m_state.gp.state.omBlend[attachment] = DxvkOmAttachmentBlend( blendMode.enableBlending, blendMode.colorSrcFactor, @@ -2794,6 +2871,7 @@ namespace dxvk { VkPipelineBindPoint pipeline, uint32_t index, uint32_t value) { + ScopedCpuProfileZone(); // NV-DXVK start: terrain baking static_assert(D3D9SpecConstantId::Count <= DxvkLimits::MaxNumSpecConstants); // NV-DXVK end @@ -2823,6 +2901,7 @@ namespace dxvk { void DxvkContext::signalGpuEvent(const Rc& event) { + ScopedCpuProfileZone(); this->spillRenderPass(true); DxvkGpuEventHandle handle = m_common->eventPool().allocEvent(); @@ -2839,6 +2918,7 @@ namespace dxvk { const VkCuLaunchInfoNVX& nvxLaunchInfo, const std::vector, DxvkAccessFlags>>& buffers, const std::vector, DxvkAccessFlags>>& images) { + ScopedCpuProfileZone(); // The resources in the std::vectors above are called-out // explicitly in the API for barrier and tracking purposes // since they're being used bindlessly. @@ -2903,16 +2983,19 @@ namespace dxvk { void DxvkContext::writeTimestamp(const Rc& query) { + ScopedCpuProfileZone(); m_queryManager.writeTimestamp(m_cmd, query); } void DxvkContext::signal(const Rc& signal, uint64_t value) { + ScopedCpuProfileZone(); m_cmd->queueSignal(signal, value); } void DxvkContext::beginDebugLabel(VkDebugUtilsLabelEXT *label) { + ScopedCpuProfileZone(); if (!m_device->instance()->extensions().extDebugUtils) return; @@ -2921,12 +3004,14 @@ namespace dxvk { // NV-DXVK start: Integrate Aftermath void DxvkContext::deviceDiagnosticCheckpoint(const void* data) { + ScopedCpuProfileZone(); if (m_device->extensions().nvDeviceDiagnosticCheckpoints) m_cmd->vkCmdSetCheckpointNV(data); } // NV-DXVK end void DxvkContext::endDebugLabel() { + ScopedCpuProfileZone(); if (!m_device->instance()->extensions().extDebugUtils) return; @@ -2934,6 +3019,7 @@ namespace dxvk { } void DxvkContext::insertDebugLabel(VkDebugUtilsLabelEXT *label) { + ScopedCpuProfileZone(); if (!m_device->instance()->extensions().extDebugUtils) return; @@ -2962,6 +3048,7 @@ namespace dxvk { const VkImageBlit& region, const VkComponentMapping& mapping, VkFilter filter) { + ScopedCpuProfileZone(); auto dstSubresourceRange = vk::makeSubresourceRange(region.dstSubresource); auto srcSubresourceRange = vk::makeSubresourceRange(region.srcSubresource); @@ -3133,6 +3220,7 @@ namespace dxvk { const Rc& srcImage, const VkImageBlit& region, VkFilter filter) { + ScopedCpuProfileZone(); auto dstSubresourceRange = vk::makeSubresourceRange(region.dstSubresource); auto srcSubresourceRange = vk::makeSubresourceRange(region.srcSubresource); @@ -3204,6 +3292,7 @@ namespace dxvk { const DxvkBufferSliceHandle& bufferSlice, VkDeviceSize bufferRowAlignment, VkDeviceSize bufferSliceAlignment) { + ScopedCpuProfileZone(); auto formatInfo = image->formatInfo(); auto layers = imageSubresource.layerCount; @@ -3287,6 +3376,7 @@ namespace dxvk { const void* hostData, VkDeviceSize rowPitch, VkDeviceSize slicePitch) { + ScopedCpuProfileZone(); auto formatInfo = image->formatInfo(); auto srcData = reinterpret_cast(hostData); @@ -3335,6 +3425,7 @@ namespace dxvk { VkExtent3D extent, VkImageAspectFlags aspect, VkClearValue value) { + ScopedCpuProfileZone(); this->updateFramebuffer(); // Find out if the render target view is currently bound, @@ -3430,6 +3521,7 @@ namespace dxvk { void DxvkContext::clearAttachments(VkClearAttachment clearInfo, VkClearRect clearRect) { + ScopedCpuProfileZone(); m_cmd->cmdClearAttachments(1, &clearInfo, 1, &clearRect); } @@ -3438,6 +3530,7 @@ namespace dxvk { VkOffset3D offset, VkExtent3D extent, VkClearValue value) { + ScopedCpuProfileZone(); this->spillRenderPass(false); this->unbindComputePipeline(); @@ -3527,6 +3620,7 @@ namespace dxvk { VkImageSubresourceLayers srcSubresource, VkOffset3D srcOffset, VkExtent3D extent) { + ScopedCpuProfileZone(); auto dstSubresourceRange = vk::makeSubresourceRange(dstSubresource); auto srcSubresourceRange = vk::makeSubresourceRange(srcSubresource); @@ -3625,6 +3719,7 @@ namespace dxvk { VkImageSubresourceLayers srcSubresource, VkOffset3D srcOffset, VkExtent3D extent) { + ScopedCpuProfileZone(); auto dstSubresourceRange = vk::makeSubresourceRange(dstSubresource); auto srcSubresourceRange = vk::makeSubresourceRange(srcSubresource); @@ -3852,6 +3947,7 @@ namespace dxvk { VkExtent3D dstExtent, const Rc& srcImage, VkImageSubresourceLayers srcSubresource) { + ScopedCpuProfileZone(); // If the source image has a pending deferred clear, we can // implement the copy by clearing the destination image to // the same clear value. @@ -3985,6 +4081,7 @@ namespace dxvk { const VkImageResolve& region, VkResolveModeFlagBitsKHR depthMode, VkResolveModeFlagBitsKHR stencilMode) { + ScopedCpuProfileZone(); auto dstSubresourceRange = vk::makeSubresourceRange(region.dstSubresource); auto srcSubresourceRange = vk::makeSubresourceRange(region.srcSubresource); @@ -4048,6 +4145,7 @@ namespace dxvk { VkFormat format, VkResolveModeFlagBitsKHR depthMode, VkResolveModeFlagBitsKHR stencilMode) { + ScopedCpuProfileZone(); auto dstSubresourceRange = vk::makeSubresourceRange(region.dstSubresource); auto srcSubresourceRange = vk::makeSubresourceRange(region.srcSubresource); @@ -4195,6 +4293,7 @@ namespace dxvk { void DxvkContext::startRenderPass() { + ScopedCpuProfileZone(); if (!m_flags.test(DxvkContextFlag::GpRenderPassBound)) { this->applyRenderTargetLoadLayouts(); this->flushClears(true); @@ -4227,6 +4326,7 @@ namespace dxvk { void DxvkContext::spillRenderPass(bool suspend) { + ScopedCpuProfileZone(); if (m_flags.test(DxvkContextFlag::GpRenderPassBound)) { m_flags.clr(DxvkContextFlag::GpRenderPassBound); @@ -4265,6 +4365,7 @@ namespace dxvk { const DxvkRenderPassOps& ops, uint32_t clearValueCount, const VkClearValue* clearValues) { + ScopedCpuProfileZone(); const DxvkFramebufferSize fbSize = framebufferInfo.size(); Rc framebuffer = this->lookupFramebuffer(framebufferInfo); @@ -4297,6 +4398,7 @@ namespace dxvk { void DxvkContext::renderPassUnbindFramebuffer() { + ScopedCpuProfileZone(); m_cmd->cmdEndRenderPass(); } @@ -4304,6 +4406,7 @@ namespace dxvk { void DxvkContext::resetRenderPassOps( const DxvkRenderTargets& renderTargets, DxvkRenderPassOps& renderPassOps) { + ScopedCpuProfileZone(); VkAccessFlags access = 0; if (renderTargets.depth.view != nullptr) { @@ -4341,6 +4444,7 @@ namespace dxvk { void DxvkContext::startTransformFeedback() { + ScopedCpuProfileZone(); if (!m_flags.test(DxvkContextFlag::GpXfbActive)) { m_flags.set(DxvkContextFlag::GpXfbActive); @@ -4368,6 +4472,7 @@ namespace dxvk { void DxvkContext::pauseTransformFeedback() { if (m_flags.test(DxvkContextFlag::GpXfbActive)) { + ScopedCpuProfileZone(); m_flags.clr(DxvkContextFlag::GpXfbActive); VkBuffer ctrBuffers[MaxNumXfbBuffers]; @@ -4393,6 +4498,7 @@ namespace dxvk { void DxvkContext::unbindComputePipeline() { + ScopedCpuProfileZone(); m_flags.set( DxvkContextFlag::CpDirtyPipeline, DxvkContextFlag::CpDirtyPipelineState, @@ -4403,6 +4509,7 @@ namespace dxvk { bool DxvkContext::updateComputePipeline() { + ScopedCpuProfileZone(); m_state.cp.pipeline = lookupComputePipeline(m_state.cp.shaders); if (unlikely(m_state.cp.pipeline == nullptr)) @@ -4417,7 +4524,8 @@ namespace dxvk { bool DxvkContext::updateComputePipelineState() { - m_cpActivePipeline = m_state.cp.pipeline->getPipelineHandle(m_state.cp.state); + ScopedCpuProfileZone(); + m_cpActivePipeline = m_state.cp.pipeline->getPipelineHandle(m_state.cp.state); if (unlikely(!m_cpActivePipeline)) return false; @@ -4442,6 +4550,7 @@ namespace dxvk { bool DxvkContext::updateRaytracingPipeline() { + ScopedCpuProfileZone(); m_state.rp.pipeline = lookupRaytracingPipeline(m_state.rp.shaders); if (unlikely(m_state.rp.pipeline == nullptr)) @@ -4456,6 +4565,7 @@ namespace dxvk { bool DxvkContext::updateRaytracingPipelineState() { + ScopedCpuProfileZone(); m_rpActivePipeline = m_state.rp.pipeline->getPipelineHandle(); @@ -4473,6 +4583,7 @@ namespace dxvk { void DxvkContext::unbindGraphicsPipeline() { + ScopedCpuProfileZone(); m_flags.set( DxvkContextFlag::GpDirtyPipeline, DxvkContextFlag::GpDirtyPipelineState, @@ -4491,6 +4602,7 @@ namespace dxvk { bool DxvkContext::updateGraphicsPipeline() { + ScopedCpuProfileZone(); m_state.gp.pipeline = lookupGraphicsPipeline(m_state.gp.shaders); if (unlikely(m_state.gp.pipeline == nullptr)) { @@ -4522,6 +4634,7 @@ namespace dxvk { bool DxvkContext::updateGraphicsPipelineState() { + ScopedCpuProfileZone(); // Set up vertex buffer strides for active bindings for (uint32_t i = 0; i < m_state.gp.state.il.bindingCount(); i++) { const uint32_t binding = m_state.gp.state.ilBindings[i].binding(); @@ -4568,6 +4681,7 @@ namespace dxvk { void DxvkContext::updateComputeShaderResources() { + ScopedCpuProfileZone(); if ((m_flags.test(DxvkContextFlag::CpDirtyResources)) || (m_state.cp.pipeline->layout()->hasStaticBufferBindings())) this->updateShaderResources(m_state.cp.pipeline->layout()); @@ -4581,6 +4695,7 @@ namespace dxvk { void DxvkContext::updateRaytracingShaderResources() { + ScopedCpuProfileZone(); if ((m_flags.test(DxvkContextFlag::RpDirtyResources)) || (m_state.rp.pipeline->layout()->hasStaticBufferBindings())) this->updateShaderResources(m_state.rp.pipeline->layout()); @@ -4594,6 +4709,7 @@ namespace dxvk { void DxvkContext::updateGraphicsShaderResources() { + ScopedCpuProfileZone(); if ((m_flags.test(DxvkContextFlag::GpDirtyResources)) || (m_state.gp.pipeline->layout()->hasStaticBufferBindings())) this->updateShaderResources(m_state.gp.pipeline->layout()); @@ -4608,6 +4724,7 @@ namespace dxvk { template void DxvkContext::updateShaderResources(const DxvkPipelineLayout* layout) { + ScopedCpuProfileZone(); std::array descriptors; // Assume that all bindings are active as a fast path @@ -4827,6 +4944,7 @@ namespace dxvk { void DxvkContext::updateShaderDescriptorSetBinding( VkDescriptorSet set, const DxvkPipelineLayout* layout) { + ScopedCpuProfileZone(); if (set) { std::array offsets; @@ -4849,6 +4967,7 @@ namespace dxvk { DxvkFramebufferInfo DxvkContext::makeFramebufferInfo( const DxvkRenderTargets& renderTargets) { + ScopedCpuProfileZone(); auto renderPassFormat = DxvkFramebufferInfo::getRenderPassFormat(renderTargets); auto renderPassObject = m_common->renderPassPool().getRenderPass(renderPassFormat); @@ -4857,6 +4976,7 @@ namespace dxvk { void DxvkContext::updateFramebuffer() { + ScopedCpuProfileZone(); if (m_flags.test(DxvkContextFlag::GpDirtyFramebuffer)) { m_flags.clr(DxvkContextFlag::GpDirtyFramebuffer); @@ -4884,6 +5004,7 @@ namespace dxvk { void DxvkContext::applyRenderTargetLoadLayouts() { + ScopedCpuProfileZone(); for (uint32_t i = 0; i < MaxNumRenderTargets; i++) m_state.om.renderPassOps.colorOps[i].loadLayout = m_rtLayouts.color[i]; @@ -4892,6 +5013,7 @@ namespace dxvk { void DxvkContext::applyRenderTargetStoreLayouts() { + ScopedCpuProfileZone(); for (uint32_t i = 0; i < MaxNumRenderTargets; i++) m_rtLayouts.color[i] = m_state.om.renderPassOps.colorOps[i].storeLayout; @@ -4902,6 +5024,7 @@ namespace dxvk { void DxvkContext::transitionRenderTargetLayouts( DxvkBarrierSet& barriers, bool sharedOnly) { + ScopedCpuProfileZone(); for (uint32_t i = 0; i < MaxNumRenderTargets; i++) { const DxvkAttachment& color = m_state.om.framebufferInfo.getColorTarget(i); @@ -4924,6 +5047,7 @@ namespace dxvk { DxvkBarrierSet& barriers, const DxvkAttachment& attachment, VkImageLayout oldLayout) { + ScopedCpuProfileZone(); if (oldLayout != attachment.view->imageInfo().layout) { barriers.accessImage( attachment.view->image(), @@ -4943,6 +5067,7 @@ namespace dxvk { DxvkBarrierSet& barriers, const DxvkAttachment& attachment, VkImageLayout oldLayout) { + ScopedCpuProfileZone(); if (oldLayout != attachment.view->imageInfo().layout) { barriers.accessImage( attachment.view->image(), @@ -4963,6 +5088,7 @@ namespace dxvk { void DxvkContext::updateRenderTargetLayouts( const DxvkFramebufferInfo& newFb, const DxvkFramebufferInfo& oldFb) { + ScopedCpuProfileZone(); DxvkRenderTargetLayouts layouts = { }; for (uint32_t i = 0; i < MaxNumRenderTargets; i++) { @@ -5022,6 +5148,7 @@ namespace dxvk { const Rc& image, const VkImageSubresourceRange& subresources, bool flushClears) { + ScopedCpuProfileZone(); // Images that can't be used as attachments are always in their // default layout, so we don't have to do anything in this case if (!(image->info().usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))) @@ -5062,6 +5189,7 @@ namespace dxvk { } bool DxvkContext::updateIndexBufferBinding() { + ScopedCpuProfileZone(); if (unlikely(!m_state.vi.indexBuffer.defined())) return false; @@ -5081,6 +5209,7 @@ namespace dxvk { void DxvkContext::updateVertexBufferBindings() { + ScopedCpuProfileZone(); m_flags.clr(DxvkContextFlag::GpDirtyVertexBuffers); if (unlikely(!m_state.gp.state.il.bindingCount())) @@ -5130,6 +5259,7 @@ namespace dxvk { void DxvkContext::updateTransformFeedbackBuffers() { + ScopedCpuProfileZone(); auto gsOptions = m_state.gp.shaders.gs->shaderOptions(); VkBuffer xfbBuffers[MaxNumXfbBuffers]; @@ -5161,6 +5291,7 @@ namespace dxvk { void DxvkContext::updateTransformFeedbackState() { + ScopedCpuProfileZone(); if (m_flags.test(DxvkContextFlag::GpDirtyXfbBuffers)) { m_flags.clr(DxvkContextFlag::GpDirtyXfbBuffers); @@ -5173,6 +5304,7 @@ namespace dxvk { void DxvkContext::updateDynamicState() { + ScopedCpuProfileZone(); if (!m_gpActivePipeline) return; @@ -5222,6 +5354,7 @@ namespace dxvk { template void DxvkContext::updatePushConstants() { + ScopedCpuProfileZone(); m_flags.clr(DxvkContextFlag::DirtyPushConstants); auto layout = @@ -5250,6 +5383,7 @@ namespace dxvk { bool DxvkContext::commitComputeState() { + ScopedCpuProfileZone(); this->spillRenderPass(false); if (m_flags.test(DxvkContextFlag::CpDirtyPipeline)) { @@ -5275,6 +5409,7 @@ namespace dxvk { bool DxvkContext::commitRaytracingState() { + ScopedCpuProfileZone(); this->spillRenderPass(false); if (m_flags.test(DxvkContextFlag::RpDirtyPipeline)) { @@ -5301,6 +5436,7 @@ namespace dxvk { template bool DxvkContext::commitGraphicsState() { + ScopedCpuProfileZone(); if (m_flags.test(DxvkContextFlag::GpDirtyPipeline)) { if (unlikely(!this->updateGraphicsPipeline())) return false; @@ -5358,6 +5494,7 @@ namespace dxvk { // NV-DXVK start: Split out common post barriers logic void DxvkContext::commitPostBarriers(const DxvkDescriptorSlot binding, VkPipelineStageFlags stages) { + ScopedCpuProfileZone(); const DxvkShaderResourceSlot& slot = m_rc[binding.slot]; VkAccessFlags access = binding.access; @@ -5404,6 +5541,7 @@ namespace dxvk { // NV-DXVK start: Split out common init barriers logic bool DxvkContext::commitInitBarriers(const DxvkDescriptorSlot binding, VkPipelineStageFlags stages) { + ScopedCpuProfileZone(); const DxvkShaderResourceSlot& slot = m_rc[binding.slot]; DxvkAccessFlags dstAccess = DxvkBarrierSet::getAccessTypes(binding.access); @@ -5450,6 +5588,7 @@ namespace dxvk { void DxvkContext::commitComputeInitBarriers() { + ScopedCpuProfileZone(); auto layout = m_state.cp.pipeline->layout(); bool requiresBarrier = false; @@ -5469,6 +5608,7 @@ namespace dxvk { void DxvkContext::commitComputePostBarriers() { + ScopedCpuProfileZone(); auto layout = m_state.cp.pipeline->layout(); for (uint32_t i = 0; i < layout->bindingCount(); i++) { @@ -5484,6 +5624,7 @@ namespace dxvk { // NV-DXVK start: Ray tracing init/post barriers void DxvkContext::commitRaytracingInitBarriers() { + ScopedCpuProfileZone(); auto layout = m_state.rp.pipeline->layout(); bool requiresBarrier = false; @@ -5501,6 +5642,7 @@ namespace dxvk { void DxvkContext::commitRaytracingPostBarriers() { + ScopedCpuProfileZone(); auto layout = m_state.rp.pipeline->layout(); for (uint32_t i = 0; i < layout->bindingCount(); i++) { @@ -5515,6 +5657,7 @@ namespace dxvk { template void DxvkContext::commitGraphicsBarriers() { + ScopedCpuProfileZone(); if (m_barrierControl.test(DxvkBarrierControl::IgnoreGraphicsBarriers)) return; @@ -5725,6 +5868,7 @@ namespace dxvk { VkImageLayout dstLayout, VkPipelineStageFlags dstStages, VkAccessFlags dstAccess) { + ScopedCpuProfileZone(); if (m_execBarriers.isImageDirty(image, subresources, DxvkAccess::Write)) m_execBarriers.recordCommands(m_cmd); @@ -5742,6 +5886,7 @@ namespace dxvk { VkDescriptorSet DxvkContext::allocateDescriptorSet( VkDescriptorSetLayout layout, const char *name) { + ScopedCpuProfileZone(); if (m_descPool == nullptr) m_descPool = m_device->createDescriptorPool(); @@ -5759,6 +5904,7 @@ namespace dxvk { // NV-DXVK end void DxvkContext::traceRays(uint32_t width, uint32_t height, uint32_t depth) { + ScopedCpuProfileZone(); if (this->commitRaytracingState()) { this->commitRaytracingInitBarriers(); @@ -5783,6 +5929,7 @@ namespace dxvk { void DxvkContext::trackDrawBuffer() { + ScopedCpuProfileZone(); if (m_flags.test(DxvkContextFlag::DirtyDrawBuffer)) { m_flags.clr(DxvkContextFlag::DirtyDrawBuffer); @@ -5798,6 +5945,7 @@ namespace dxvk { bool DxvkContext::tryInvalidateDeviceLocalBuffer( const Rc& buffer, VkDeviceSize copySize) { + ScopedCpuProfileZone(); // We can only discard if the full buffer gets written, and we will only discard // small buffers in order to not waste significant amounts of memory. if (copySize != buffer->info().size || copySize > 0x40000) @@ -5826,6 +5974,7 @@ namespace dxvk { DxvkGraphicsPipeline* DxvkContext::lookupGraphicsPipeline( const DxvkGraphicsPipelineShaders& shaders) { + ScopedCpuProfileZone(); auto idx = shaders.hash() % m_gpLookupCache.size(); if (unlikely(!m_gpLookupCache[idx] || !shaders.eq(m_gpLookupCache[idx]->shaders()))) @@ -5837,6 +5986,7 @@ namespace dxvk { DxvkComputePipeline* DxvkContext::lookupComputePipeline( const DxvkComputePipelineShaders& shaders) { + ScopedCpuProfileZone(); auto idx = shaders.hash() % m_cpLookupCache.size(); if (unlikely(!m_cpLookupCache[idx] || !shaders.eq(m_cpLookupCache[idx]->shaders()))) @@ -5848,6 +5998,7 @@ namespace dxvk { DxvkRaytracingPipeline* DxvkContext::lookupRaytracingPipeline( const DxvkRaytracingPipelineShaders& shaders) { + ScopedCpuProfileZone(); auto foundPipeline = m_rpLookupCache.find(shaders.hash()); if (unlikely(foundPipeline == m_rpLookupCache.end() || !shaders.eq(foundPipeline->second->shaders()))) { @@ -5862,6 +6013,7 @@ namespace dxvk { Rc DxvkContext::lookupFramebuffer( const DxvkFramebufferInfo& framebufferInfo) { + ScopedCpuProfileZone(); DxvkFramebufferKey key = framebufferInfo.key(); size_t idx = key.hash() % m_framebufferCache.size(); @@ -5874,6 +6026,7 @@ namespace dxvk { Rc DxvkContext::createZeroBuffer( VkDeviceSize size) { + ScopedCpuProfileZone(); if (m_zeroBuffer != nullptr && m_zeroBuffer->info().size >= size) return m_zeroBuffer; diff --git a/src/dxvk/dxvk_cs.cpp b/src/dxvk/dxvk_cs.cpp index cfedfef5..78249b0b 100644 --- a/src/dxvk/dxvk_cs.cpp +++ b/src/dxvk/dxvk_cs.cpp @@ -25,6 +25,7 @@ namespace dxvk { void DxvkCsChunk::executeAll(DxvkContext* ctx) { + ScopedCpuProfileZone(); auto cmd = m_head; if (m_flags.test(DxvkCsChunkFlag::SingleUse)) { @@ -76,6 +77,7 @@ namespace dxvk { DxvkCsChunk* DxvkCsChunkPool::allocChunk(DxvkCsChunkFlags flags) { + ScopedCpuProfileZone(); DxvkCsChunk* chunk = nullptr; { std::lock_guard lock(m_mutex); @@ -95,6 +97,7 @@ namespace dxvk { void DxvkCsChunkPool::freeChunk(DxvkCsChunk* chunk) { + ScopedCpuProfileZone(); chunk->reset(); std::lock_guard lock(m_mutex); @@ -169,7 +172,9 @@ namespace dxvk { try { while (!m_stopped.load()) { - { std::unique_lock lock(m_mutex); + { + ScopedCpuProfileZoneN("waiting for work"); + std::unique_lock lock(m_mutex); if (chunk) { m_chunksExecuted++; m_condOnSync.notify_one(); diff --git a/src/dxvk/dxvk_cs.h b/src/dxvk/dxvk_cs.h index 94f980df..f2c1f306 100644 --- a/src/dxvk/dxvk_cs.h +++ b/src/dxvk/dxvk_cs.h @@ -138,7 +138,9 @@ namespace dxvk { * Stores a list of commands. */ class DxvkCsChunk : public RcObject { - constexpr static size_t MaxBlockSize = 16384; + // NV-DXVK start: we tend to send a lot less data through CS than vanilla DXVK bigger numbers increase CS latency + constexpr static size_t MaxBlockSize = 4096; + // NV-DXVK end public: DxvkCsChunk(); diff --git a/src/dxvk/dxvk_scoped_annotation.cpp b/src/dxvk/dxvk_scoped_annotation.cpp index 06550209..c609e96c 100644 --- a/src/dxvk/dxvk_scoped_annotation.cpp +++ b/src/dxvk/dxvk_scoped_annotation.cpp @@ -26,6 +26,9 @@ #include "dxvk_device.h" #include "client/TracyProfiler.hpp" +// Global overload +TRACY_OBJECT_MEMORY_PROFILING + namespace dxvk { __ScopedAnnotation::__ScopedAnnotation(Rc ctx, const char* name) : m_ctx(ctx) { diff --git a/src/dxvk/dxvk_scoped_annotation.h b/src/dxvk/dxvk_scoped_annotation.h index b2ad60e9..e78032b5 100644 --- a/src/dxvk/dxvk_scoped_annotation.h +++ b/src/dxvk/dxvk_scoped_annotation.h @@ -49,9 +49,22 @@ ZoneText(name, std::strlen(name)); \ TracyVkZoneTransient((ctx)->getDevice()->queues().graphics.tracyCtx, TracyConcat(__tracy_gpu_source_location,__LINE__), (ctx)->getCmdBuffer(DxvkCmdBuffer::ExecBuffer), name, true); \ __ScopedAnnotation __scopedAnnotation(ctx, name) + + #define TRACY_OBJECT_MEMORY_PROFILING \ + void * operator new ( std :: size_t count ) { \ + auto ptr = malloc(count); \ + TracyAlloc(ptr, count); \ + return ptr; \ + } \ + void operator delete (void* ptr) noexcept { \ + TracyFree(ptr); \ + free(ptr); \ + } + #else #define ScopedCpuProfileZoneDynamic(ctx, name) #define ScopedGpuProfileZoneDynamicZ(ctx, name) + #define TRACY_OBJECT_MEMORY_PROFILING #endif namespace dxvk { diff --git a/src/dxvk/rtx_render/rtx_types.cpp b/src/dxvk/rtx_render/rtx_types.cpp index bb4d616c..c1226fcd 100644 --- a/src/dxvk/rtx_render/rtx_types.cpp +++ b/src/dxvk/rtx_render/rtx_types.cpp @@ -25,6 +25,7 @@ #include "rtx_options.h" #include "rtx_terrain_baker.h" #include "rtx_instance_manager.h" +#include "dxvk_scoped_annotation.h" namespace dxvk { uint32_t RasterGeometry::calculatePrimitiveCount() const { @@ -46,6 +47,7 @@ namespace dxvk { } bool DrawCallState::finalizePendingFutures(const RtCamera* pLastCamera) { + ScopedCpuProfileZone(); // Geometry hashes are vital, and cannot be disabled, so its important we get valid data (hence the return type) const bool valid = finalizeGeometryHashes(); if (valid) {