From 606947dd115f477ac0e87026684bfb04166b94fe Mon Sep 17 00:00:00 2001 From: Wang_Pingli Date: Fri, 15 Nov 2024 21:10:21 +0800 Subject: [PATCH] [Media Common][VP][HWS2] batch buffer cmd for gpu sync * [Media Common] batch buffer cmd for gpu sync Create a mhwInterface in MOS_INTERFACE. Destroy it in Mos_Ve_Specific_Destroy. Only create mi_itf if m_isMos set. Allocate a sync buffer assoicated to each cmd buffer. Allocate, Free, Bind, Unbind the sync buffer along with the command buffer. Add MI_BATCH_BUFFER_START size when to verfiy the command buffer size. When hal sets gpucontext, set the MOS_INTERFACE::mhwInterface to the gpucontext too. Later gpu context uses it to add sync cmds. When hal gets command buffer, returns the the synbatch buffer too. Add MI_BATCH_BUFFER_START in Mhw_SendGenericPrologCmdNext. If need to sync, add the fence info including the wait value to a list m_waitForFenceList in gpucontext if need to sync. Before submit command buffer to hwqueue, call WaitInSyncBatchBuffer to add the sync cmds in the sync batch buffer. For scalability, set the synbbatch buffer of the primary cmdbuffer to the virtual engine batch buffer. --- media_common/agnostic/common/hw/mhw_mi.h | 2 + media_common/agnostic/common/os/mos_os.h | 9 +++- .../linux/common/os/mos_os_specific.c | 7 +++ .../agnostic/common/hw/mhw_mi_cmdpar.h | 3 ++ .../agnostic/common/hw/mhw_mi_impl.h | 51 +++++++++++++------ media_softlet/agnostic/common/hw/mhw_mi_itf.h | 7 +++ .../agnostic/common/hw/mhw_mmio_common.h | 22 ++++++++ .../agnostic/common/hw/mhw_utilities_next.cpp | 9 +++- .../agnostic/common/hw/mhw_utilities_next.h | 7 +++ .../media_interfaces_mhw_next.h | 1 + .../agnostic/common/os/mos_interface.h | 1 + .../linux/common/os/mos_interface.cpp | 4 ++ .../common/os/private/mos_os_specific.cpp | 6 +++ 13 files changed, 110 insertions(+), 19 deletions(-) diff --git a/media_common/agnostic/common/hw/mhw_mi.h b/media_common/agnostic/common/hw/mhw_mi.h index 6156b6d253f..bbd189f4f96 100644 --- a/media_common/agnostic/common/hw/mhw_mi.h +++ b/media_common/agnostic/common/hw/mhw_mi.h @@ -281,11 +281,13 @@ typedef struct _MHW_MI_ATOMIC_PARAMS typedef struct _MHW_MI_SEMAPHORE_WAIT_PARAMS { PMOS_RESOURCE presSemaphoreMem; // Semaphore memory Resource + uint64_t gpuVirtualAddress; // Semaphore memory Resource uint32_t dwResourceOffset; bool bRegisterPollMode; bool bPollingWaitMode; uint32_t dwCompareOperation; uint32_t dwSemaphoreData; + bool b64bComparEnableWithGPR; MHW_COMMON_MI_SEMAPHORE_COMPARE_OPERATION CompareOperation; }MHW_MI_SEMAPHORE_WAIT_PARAMS, *PMHW_MI_SEMAPHORE_WAIT_PARAMS; diff --git a/media_common/agnostic/common/os/mos_os.h b/media_common/agnostic/common/os/mos_os.h index 822d937af30..1fc15244e4b 100644 --- a/media_common/agnostic/common/os/mos_os.h +++ b/media_common/agnostic/common/os/mos_os.h @@ -50,6 +50,7 @@ #include "mos_oca_interface.h" #include "mos_cache_manager.h" +class MhwInterfacesNext; #define MOS_NAL_UNIT_LENGTH 4 #define MOS_NAL_UNIT_STARTCODE_LENGTH 3 #define MOS_MAX_PATH_LENGTH 256 @@ -254,12 +255,15 @@ typedef int32_t MOS_SUBMISSION_TYPE; #define EXTRA_PADDING_NEEDED 4096 #define MEDIA_CMF_UNCOMPRESSED_WRITE 0xC +struct _MHW_BATCH_BUFFER; +typedef struct _MHW_BATCH_BUFFER MHW_BATCH_BUFFER, * PMHW_BATCH_BUFFER; //! //! \brief Structure to command buffer //! typedef struct _MOS_COMMAND_BUFFER { MOS_RESOURCE OsResource; //!< OS Resource + PMHW_BATCH_BUFFER syncMhwBatchBuffer; //!< Pointer to sync mhw batch buffer // Common fields uint32_t *pCmdBase; //!< Base address (CPU) @@ -573,7 +577,7 @@ struct MosStreamState uint32_t dwEnableMediaSoloFrameNum = 0; //!< The frame number at which MediaSolo will be enabled, 0 is not valid. int32_t bSoloInUse = 0; //!< Flag to indicate if MediaSolo is enabled #endif // MOS_MEDIASOLO_SUPPORTED - + MhwInterfacesNext *mhwInterface = nullptr; }; // OS agnostic MOS objects @@ -2056,6 +2060,8 @@ typedef struct _MOS_INTERFACE bool (*pfnGetCacheSetting)(MOS_COMPONENT id, uint32_t feature, bool bOut, ENGINE_TYPE engineType, MOS_CACHE_ELEMENT &element, bool isHeapSurf); + bool (* pfnIsGpuSyncByCmd) (PMOS_INTERFACE osInterface); + // Virtual Engine related int32_t bSupportVirtualEngine; //!< Enable virtual engine flag int32_t bUseHwSemaForResSyncInVE; //!< Flag to indicate if UMD need to send HW sema cmd under this OS when there is a resource sync need with Virtual Engine interface @@ -2094,6 +2100,7 @@ typedef struct _MOS_INTERFACE //!< os interface extension void *pOsExt; + MhwInterfacesNext *mhwInterface; } MOS_INTERFACE; #ifdef __cplusplus diff --git a/media_driver/linux/common/os/mos_os_specific.c b/media_driver/linux/common/os/mos_os_specific.c index 488449aa021..2ff9ebef9da 100644 --- a/media_driver/linux/common/os/mos_os_specific.c +++ b/media_driver/linux/common/os/mos_os_specific.c @@ -7092,6 +7092,12 @@ bool Mos_Specific_IsAsyncDevice(PMOS_INTERFACE pOsInterface) return false; } +bool Mos_Specific_IsGpuSyncByCmd( + PMOS_INTERFACE osInterface) +{ + return false; +} + //! \brief Unified OS Initializes OS Linux Interface //! \details Linux OS Interface initilization //! \param PMOS_INTERFACE pOsInterface @@ -7270,6 +7276,7 @@ MOS_STATUS Mos_Specific_InitInterface( pOsInterface->pfnInitCmInterface = InitCmOsDDIInterface; pOsInterface->pfnIsAsynDevice = Mos_Specific_IsAsyncDevice; + pOsInterface->pfnIsGpuSyncByCmd = Mos_Specific_IsGpuSyncByCmd; #if (_DEBUG || _RELEASE_INTERNAL) pOsInterface->pfnGetEngineLogicId = Mos_Specific_GetEngineLogicId; diff --git a/media_softlet/agnostic/common/hw/mhw_mi_cmdpar.h b/media_softlet/agnostic/common/hw/mhw_mi_cmdpar.h index dbe3f6130bd..1ccbeeb980b 100644 --- a/media_softlet/agnostic/common/hw/mhw_mi_cmdpar.h +++ b/media_softlet/agnostic/common/hw/mhw_mi_cmdpar.h @@ -164,11 +164,13 @@ namespace mi struct _MHW_PAR_T(MI_SEMAPHORE_WAIT) { PMOS_RESOURCE presSemaphoreMem = nullptr; // Semaphore memory Resource + uint64_t gpuVirtualAddress = 0; uint32_t dwResourceOffset = 0; bool bRegisterPollMode = false; bool bPollingWaitMode = false; uint32_t dwCompareOperation = 0; uint32_t dwSemaphoreData = 0; + bool b64bCompareEnableWithGPR = 0; MHW_COMMON_MI_SEMAPHORE_COMPARE_OPERATION CompareOperation = {}; }; @@ -238,6 +240,7 @@ namespace mi { uint32_t dwRegister = 0; uint32_t dwData = 0; + bool bMMIORemap = 0; }; struct _MHW_PAR_T(MI_LOAD_REGISTER_REG) diff --git a/media_softlet/agnostic/common/hw/mhw_mi_impl.h b/media_softlet/agnostic/common/hw/mhw_mi_impl.h index 4594ee7b4e5..500a7c25b96 100644 --- a/media_softlet/agnostic/common/hw/mhw_mi_impl.h +++ b/media_softlet/agnostic/common/hw/mhw_mi_impl.h @@ -292,12 +292,21 @@ class Impl : public Itf, public mhw::Impl return MOS_STATUS_SUCCESS; } + virtual MOS_STATUS AddWaitInSyncBatchBuffer( + uint64_t fenceTokenValue, + uint64_t gpuVirtualAddress, + uint64_t waitValue, + MHW_BATCH_BUFFER &batchBuffer, + MHW_SEMAPHORE_WATI_REGISTERS &tokenRegister) override + { + return MOS_STATUS_SUCCESS; + } + protected: using base_t = Itf; MHW_MI_MMIOREGISTERS m_mmioRegisters = {}; MhwCpInterface *m_cpInterface = nullptr; - public: Impl(PMOS_INTERFACE osItf) : mhw::Impl(osItf) { @@ -308,21 +317,31 @@ class Impl : public Itf, public mhw::Impl { _MHW_SETCMD_CALLBASE(MI_SEMAPHORE_WAIT); - MHW_MI_CHK_NULL(this->m_currentCmdBuf); - MHW_MI_CHK_NULL(params.presSemaphoreMem); - - MHW_RESOURCE_PARAMS resourceParams ={}; - resourceParams.presResource = params.presSemaphoreMem; - resourceParams.dwOffset = params.dwResourceOffset; - resourceParams.pdwCmd = cmd.DW2_3.Value; - resourceParams.dwLocationInCmd = _MHW_CMD_DW_LOCATION(DW2_3.Value);; - resourceParams.dwLsbNum = MHW_COMMON_MI_GENERAL_SHIFT; - resourceParams.HwCommandType = MOS_MI_SEMAPHORE_WAIT; - - MHW_MI_CHK_STATUS(AddResourceToCmd( - this->m_osItf, - this->m_currentCmdBuf, - &resourceParams)); + if(params.presSemaphoreMem) + { + MHW_MI_CHK_NULL(this->m_currentCmdBuf); + MHW_RESOURCE_PARAMS resourceParams ={}; + resourceParams.presResource = params.presSemaphoreMem; + resourceParams.dwOffset = params.dwResourceOffset; + resourceParams.pdwCmd = cmd.DW2_3.Value; + resourceParams.dwLocationInCmd = _MHW_CMD_DW_LOCATION(DW2_3.Value);; + resourceParams.dwLsbNum = MHW_COMMON_MI_GENERAL_SHIFT; + resourceParams.HwCommandType = MOS_MI_SEMAPHORE_WAIT; + + MHW_MI_CHK_STATUS(AddResourceToCmd( + this->m_osItf, + this->m_currentCmdBuf, + &resourceParams)); + } + else if(params.gpuVirtualAddress != 0) + { + cmd.DW2_3.SemaphoreAddress = (params.gpuVirtualAddress) >> MHW_COMMON_MI_GENERAL_SHIFT; + } + else + { + MHW_ASSERTMESSAGE("Invalid parameter, both resource and gpuva zero."); + return MOS_STATUS_INVALID_PARAMETER; + } cmd.DW0.MemoryType = IsGlobalGttInUse(); cmd.DW0.WaitMode = params.bPollingWaitMode; diff --git a/media_softlet/agnostic/common/hw/mhw_mi_itf.h b/media_softlet/agnostic/common/hw/mhw_mi_itf.h index 75c351c86d7..fabab6dc55d 100644 --- a/media_softlet/agnostic/common/hw/mhw_mi_itf.h +++ b/media_softlet/agnostic/common/hw/mhw_mi_itf.h @@ -112,6 +112,13 @@ class Itf virtual MOS_STATUS AddBLTMMIOPrologCmd(PMOS_COMMAND_BUFFER cmdBuffer) = 0; + virtual MOS_STATUS AddWaitInSyncBatchBuffer( + uint64_t fenceTokenValue, + uint64_t gpuVirtualAddress, + uint64_t waitValue, + MHW_BATCH_BUFFER &batchBuffer, + MHW_SEMAPHORE_WATI_REGISTERS &tokenRegister) = 0; + _MI_CMD_DEF(_MHW_CMD_ALL_DEF_FOR_ITF); MEDIA_CLASS_DEFINE_END(mhw__mi__Itf) }; diff --git a/media_softlet/agnostic/common/hw/mhw_mmio_common.h b/media_softlet/agnostic/common/hw/mhw_mmio_common.h index 5d024d4d859..df0df50ad7c 100644 --- a/media_softlet/agnostic/common/hw/mhw_mmio_common.h +++ b/media_softlet/agnostic/common/hw/mhw_mmio_common.h @@ -64,5 +64,27 @@ static constexpr uint32_t WATCHDOG_COUNT_THRESTHOLD_OFFSET_VCS1 static constexpr uint32_t WATCHDOG_COUNT_CTRL_OFFSET_VECS = 0x1C8178; static constexpr uint32_t WATCHDOG_COUNT_THRESTHOLD_OFFSET_VECS = 0x1C817C; +//Semaphore Token +static constexpr uint32_t MMIO_SEMAPHORE_TOKEN_RCS = 0x022b4; +static constexpr uint32_t MMIO_SEMAPHORE_TOKEN_CCS0 = 0x1A2B4; +static constexpr uint32_t MMIO_SEMAPHORE_TOKEN_BCS0 = 0x222B4; +static constexpr uint32_t MMIO_SEMAPHORE_TOKEN_TEE = 0x11A2B4; +static constexpr uint32_t MMIO_SEMAPHORE_TOKEN_VCS0 = 0x1C02B4; +static constexpr uint32_t MMIO_SEMAPHORE_TOKEN_VECS0 = 0x1C82B4; +//CS_GPR_CCS +static constexpr uint32_t CCS_GP_REGISTER0_LO_OFFSET = 0x1A600; +static constexpr uint32_t CCS_GP_REGISTER0_HI_OFFSET = 0x1A604; + +//CS_GPR_BCS +static constexpr uint32_t BCS_GP_REGISTER0_LO_OFFSET = 0x22600; +static constexpr uint32_t BCS_GP_REGISTER0_HI_OFFSET = 0x22604; + +//CS_GPR_TEE +static constexpr uint32_t TEE_GP_REGISTER0_LO_OFFSET = 0x11A600; +static constexpr uint32_t TEE_GP_REGISTER0_HI_OFFSET = 0x11A604; + +//CS_GPR_VCS +static constexpr uint32_t VCS_GP_REGISTER0_LO_OFFSET = 0x1C0600; +static constexpr uint32_t VCS_GP_REGISTER0_HI_OFFSET = 0x1C0604; #endif //__MHW_MMIO_COMMON_H__ diff --git a/media_softlet/agnostic/common/hw/mhw_utilities_next.cpp b/media_softlet/agnostic/common/hw/mhw_utilities_next.cpp index 4b8f5318e98..502c3e56eed 100644 --- a/media_softlet/agnostic/common/hw/mhw_utilities_next.cpp +++ b/media_softlet/agnostic/common/hw/mhw_utilities_next.cpp @@ -475,10 +475,15 @@ MOS_STATUS Mhw_SendGenericPrologCmdNext( MHW_CHK_NULL_RETURN(pSkuTable); pWaTable = pOsInterface->pfnGetWaTable(pOsInterface); MHW_CHK_NULL_RETURN(pWaTable); - GpuContext = pOsInterface->pfnGetGpuContext(pOsInterface); - + if (pOsInterface->pfnIsGpuSyncByCmd(pOsInterface) && pCmdBuffer->syncMhwBatchBuffer != nullptr) // Some gpu context may not support sync with batch buffer + { + //Reset params + auto &miBatchBufferStartParams = miItf->MHW_GETPAR_F(MI_BATCH_BUFFER_START)(); + miBatchBufferStartParams = {}; + MHW_CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(pCmdBuffer, pCmdBuffer->syncMhwBatchBuffer)); + } if ( pOsInterface->Component != COMPONENT_CM ) { if ( GpuContext == MOS_GPU_CONTEXT_RENDER || diff --git a/media_softlet/agnostic/common/hw/mhw_utilities_next.h b/media_softlet/agnostic/common/hw/mhw_utilities_next.h index f5fe687377e..797e41a150b 100644 --- a/media_softlet/agnostic/common/hw/mhw_utilities_next.h +++ b/media_softlet/agnostic/common/hw/mhw_utilities_next.h @@ -796,4 +796,11 @@ static __inline MOS_STATUS Mhw_AddCommandCmdOrBB( } } +struct MHW_SEMAPHORE_WATI_REGISTERS +{ + uint32_t m_tokenRegister = 0; + uint32_t m_gpr0Lo = 0; + uint32_t m_gpr0Hi = 0; + bool m_bMMIORemap = 0; +}; #endif // __MHW_UTILITIES_NEXT_H__ diff --git a/media_softlet/agnostic/common/media_interfaces/media_interfaces_mhw_next.h b/media_softlet/agnostic/common/media_interfaces/media_interfaces_mhw_next.h index a5a9ea35865..d9970c3a772 100644 --- a/media_softlet/agnostic/common/media_interfaces/media_interfaces_mhw_next.h +++ b/media_softlet/agnostic/common/media_interfaces/media_interfaces_mhw_next.h @@ -95,6 +95,7 @@ class MhwInterfacesNext uint8_t m_heapMode = 0; //!< To be deprecated when heap management unified bool m_isDecode = false; //!< Whether or not decode is in use, only valid for VDBOX creation bool m_isCp = false; //!< Whether or not CP is in use, CP only need mi and cp interface. + bool m_isMos = false; //!< Create it for mos, for example hws . }; /* Below legacy interfaces are kept temporarily for backward compatibility */ diff --git a/media_softlet/agnostic/common/os/mos_interface.h b/media_softlet/agnostic/common/os/mos_interface.h index 80efdf892e3..7db82be8684 100644 --- a/media_softlet/agnostic/common/os/mos_interface.h +++ b/media_softlet/agnostic/common/os/mos_interface.h @@ -2375,6 +2375,7 @@ class MosInterface static void SetIsTrinityEnabled(bool bTrinity); + static bool IsGpuSyncByCmd(MOS_STREAM_HANDLE streamState); private: //! //! \brief Init per stream parameters diff --git a/media_softlet/linux/common/os/mos_interface.cpp b/media_softlet/linux/common/os/mos_interface.cpp index 53e00b895dd..c8ec8815ea1 100644 --- a/media_softlet/linux/common/os/mos_interface.cpp +++ b/media_softlet/linux/common/os/mos_interface.cpp @@ -4127,4 +4127,8 @@ bool MosInterface::m_bTrinity = false; void MosInterface::SetIsTrinityEnabled(bool bTrinity) { return; +} +bool MosInterface::IsGpuSyncByCmd(MOS_STREAM_HANDLE streamState) +{ + return false; } \ No newline at end of file diff --git a/media_softlet/linux/common/os/private/mos_os_specific.cpp b/media_softlet/linux/common/os/private/mos_os_specific.cpp index 54091ffbfcc..97e24a730bd 100644 --- a/media_softlet/linux/common/os/private/mos_os_specific.cpp +++ b/media_softlet/linux/common/os/private/mos_os_specific.cpp @@ -3388,6 +3388,12 @@ bool Mos_Specific_IsAsyncDevice(PMOS_INTERFACE osInterface) return false; } +bool Mos_Specific_IsGpuSyncByCmd( + PMOS_INTERFACE osInterface) +{ + return false; +} + MOS_STATUS Mos_Specific_LoadFunction( PMOS_INTERFACE osInterface) {