diff --git a/runtime/oti/j9consts.h b/runtime/oti/j9consts.h index 24b372f738f..02f51a34160 100644 --- a/runtime/oti/j9consts.h +++ b/runtime/oti/j9consts.h @@ -932,6 +932,8 @@ extern "C" { #define J9JFR_EVENT_TYPE_THREAD_END 2 #define J9JFR_EVENT_TYPE_THREAD_SLEEP 3 #define J9JFR_EVENT_TYPE_OBJECT_WAIT 4 +#define J9JFR_EVENT_TYPE_CPU_LOAD 5 +#define J9JFR_EVENT_TYPE_THREAD_CPU_LOAD 6 /* JFR thread states */ diff --git a/runtime/oti/j9nonbuilder.h b/runtime/oti/j9nonbuilder.h index 4c9d2eb008b..f7597f88f70 100644 --- a/runtime/oti/j9nonbuilder.h +++ b/runtime/oti/j9nonbuilder.h @@ -345,6 +345,11 @@ struct J9VMContinuation; #if defined(J9VM_OPT_JFR) +typedef struct J9VMThreadJFRState { + omrthread_thread_time_t prevThreadCPUTimes; + int64_t prevTimestamp; +} ThreadJFRState; + typedef struct J9JFRBufferWalkState { U_8 *current; U_8 *end; @@ -414,6 +419,18 @@ typedef struct J9JFRMonitorWaited { } J9JFRMonitorWaited; #define J9JFRMonitorWaitedED_STACKTRACE(jfrEvent) ((UDATA*)(((J9JFRMonitorWaited*)(jfrEvent)) + 1)) +typedef struct J9JFRCPULoad { + J9JFR_EVENT_COMMON_FIELDS + float jvmUser; + float jvmSystem; + float machineTotal; +} J9JFRCPULoad; + +typedef struct J9JFRThreadCPULoad { + J9JFR_EVENT_COMMON_FIELDS + float user; + float system; +} J9JFRThreadCPULoad; #endif /* defined(J9VM_OPT_JFR) */ @@ -5593,6 +5610,9 @@ typedef struct J9VMThread { j9object_t closeScopeObj; #endif /* JAVA_SPEC_VERSION >= 22 */ UDATA unsafeIndexableHeaderSize; +#if defined(J9VM_OPT_JFR) + ThreadJFRState threadJfrState; +#endif /* defined(J9VM_OPT_JFR) */ } J9VMThread; #define J9VMTHREAD_ALIGNMENT 0x100 @@ -5671,6 +5691,9 @@ typedef struct JFRState { BOOLEAN isConstantEventsInitialized; BOOLEAN isStarted; omrthread_monitor_t isConstantEventsInitializedMutex; + J9SysinfoCPUTime prevSysCPUTime; + omrthread_process_time_t prevProcCPUTimes; + int64_t prevProcTimestamp; } JFRState; typedef struct J9ReflectFunctionTable { @@ -6210,6 +6233,7 @@ typedef struct J9JavaVM { omrthread_t jfrSamplerThread; UDATA jfrSamplerState; IDATA jfrAsyncKey; + IDATA jfrThreadCPULoadAsyncKey; #endif /* defined(J9VM_OPT_JFR) */ #if JAVA_SPEC_VERSION >= 22 omrthread_monitor_t closeScopeMutex; diff --git a/runtime/vm/BufferWriter.hpp b/runtime/vm/BufferWriter.hpp index 990774985c5..954cab5cefe 100644 --- a/runtime/vm/BufferWriter.hpp +++ b/runtime/vm/BufferWriter.hpp @@ -293,6 +293,13 @@ class VM_BufferWriter { writeU8(((newVal >> 21) & 0x7F)); } + void + writeFloat(float val) + { + U_32 newVal = *(U_32 *)&val; + writeU32(newVal); + } + static U_32 convertFromLEB128ToU32(U_8 *start) { diff --git a/runtime/vm/JFRChunkWriter.hpp b/runtime/vm/JFRChunkWriter.hpp index e5e73b19af4..c6ab312d500 100644 --- a/runtime/vm/JFRChunkWriter.hpp +++ b/runtime/vm/JFRChunkWriter.hpp @@ -71,6 +71,8 @@ enum MetadataTypeID { VirtualizationInformationID = 88, InitialSystemPropertyID = 89, CPUInformationID = 92, + CPULoadID = 94, + ThreadCPULoadID = 95, PhysicalMemoryID = 107, ExecutionSampleID = 108, ThreadID = 163, @@ -156,6 +158,8 @@ class VM_JFRChunkWriter { static constexpr int CPU_INFORMATION_EVENT_SIZE = 600; static constexpr int OS_INFORMATION_EVENT_SIZE = 100; static constexpr int INITIAL_SYSTEM_PROPERTY_EVENT_SIZE = 6000; + static constexpr int CPU_LOAD_EVENT_SIZE = (3 * sizeof(float)) + (3 * sizeof(I_64)); + static constexpr int THREAD_CPU_LOAD_EVENT_SIZE = (2 * sizeof(float)) + (4 * sizeof(I_64)); static constexpr int METADATA_ID = 1; @@ -325,6 +329,9 @@ class VM_JFRChunkWriter { pool_do(_constantPoolTypes.getThreadSleepTable(), &writeThreadSleepEvent, _bufferWriter); pool_do(_constantPoolTypes.getMonitorWaitTable(), &writeMonitorWaitEvent, _bufferWriter); + pool_do(_constantPoolTypes.getCPULoadTable(), &writeCPULoadEvent, _bufferWriter); + + pool_do(_constantPoolTypes.getThreadCPULoadTable(), &writeThreadCPULoadEvent, _bufferWriter); /* Only write constant events in first chunk */ if (0 == _vm->jfrState.jfrChunkCount) { @@ -525,6 +532,61 @@ class VM_JFRChunkWriter { _bufferWriter->writeLEB128PaddedU32(dataStart, _bufferWriter->getCursor() - dataStart); } + static void + writeCPULoadEvent(void *anElement, void *userData) + { + CPULoadEntry *entry = (CPULoadEntry *)anElement; + VM_BufferWriter *_bufferWriter = (VM_BufferWriter *)userData; + + /* reserve size field */ + U_8 *dataStart = _bufferWriter->getAndIncCursor(sizeof(U_32)); + + _bufferWriter->writeLEB128(CPULoadID); + + /* write start time */ + _bufferWriter->writeLEB128(entry->ticks); + + /* write user CPU load */ + _bufferWriter->writeFloat(entry->jvmUser); + + /* write system CPU load */ + _bufferWriter->writeFloat(entry->jvmSystem); + + /* write machine total CPU load */ + _bufferWriter->writeFloat(entry->machineTotal); + + /* write size */ + _bufferWriter->writeLEB128PaddedU32(dataStart, _bufferWriter->getCursor() - dataStart); + } + + static void + writeThreadCPULoadEvent(void *anElement, void *userData) + { + ThreadCPULoadEntry *entry = (ThreadCPULoadEntry *)anElement; + VM_BufferWriter *_bufferWriter = (VM_BufferWriter *)userData; + + /* reserve size field */ + U_8 *dataStart = _bufferWriter->getAndIncCursor(sizeof(U_32)); + + /* write event type */ + _bufferWriter->writeLEB128(ThreadCPULoadID); + + /* write start time */ + _bufferWriter->writeLEB128(entry->ticks); + + /* write thread index */ + _bufferWriter->writeLEB128(entry->threadIndex); + + /* write user thread CPU load */ + _bufferWriter->writeFloat(entry->user); + + /* write system thread CPU load */ + _bufferWriter->writeFloat(entry->system); + + /* write size */ + _bufferWriter->writeLEB128PaddedU32(dataStart, _bufferWriter->getCursor() - dataStart); + } + void writeJFRChunkToFile() { @@ -638,6 +700,11 @@ class VM_JFRChunkWriter { requireBufferSize += CPU_INFORMATION_EVENT_SIZE; requireBufferSize += INITIAL_SYSTEM_PROPERTY_EVENT_SIZE; + + requireBufferSize += _constantPoolTypes.getCPULoadCount() * CPU_LOAD_EVENT_SIZE; + + requireBufferSize += _constantPoolTypes.getThreadCPULoadCount() * THREAD_CPU_LOAD_EVENT_SIZE; + return requireBufferSize; } diff --git a/runtime/vm/JFRConstantPoolTypes.cpp b/runtime/vm/JFRConstantPoolTypes.cpp index a1ecb03af47..2278179b2b6 100644 --- a/runtime/vm/JFRConstantPoolTypes.cpp +++ b/runtime/vm/JFRConstantPoolTypes.cpp @@ -1068,6 +1068,51 @@ VM_JFRConstantPoolTypes::addMonitorWaitEntry(J9JFRMonitorWaited* threadWaitData) return index; } +U_32 +VM_JFRConstantPoolTypes::addCPULoadEntry(J9JFRCPULoad *cpuLoadData) +{ + CPULoadEntry *entry = (CPULoadEntry *)pool_newElement(_cpuLoadTable); + U_32 index = U_32_MAX; + + if (NULL == entry) { + _buildResult = OutOfMemory; + goto done; + } + + entry->ticks = cpuLoadData->startTicks; + entry->jvmUser = cpuLoadData->jvmUser; + entry->jvmSystem = cpuLoadData->jvmSystem; + entry->machineTotal = cpuLoadData->machineTotal; + + index = _cpuLoadCount++; + +done: + return index; +} + +U_32 +VM_JFRConstantPoolTypes::addThreadCPULoadEntry(J9JFRThreadCPULoad *threadCPULoadData) +{ + ThreadCPULoadEntry *entry = (ThreadCPULoadEntry *)pool_newElement(_threadCPULoadTable); + U_32 index = U_32_MAX; + + if (NULL == entry) { + _buildResult = OutOfMemory; + goto done; + } + + entry->ticks = threadCPULoadData->startTicks; + entry->user = threadCPULoadData->user; + entry->system = threadCPULoadData->system; + + entry->threadIndex = addThreadEntry(threadCPULoadData->vmThread); + if (isResultNotOKay()) goto done; + + index = _threadCPULoadCount++; + +done: + return index; +} void VM_JFRConstantPoolTypes::printTables() diff --git a/runtime/vm/JFRConstantPoolTypes.hpp b/runtime/vm/JFRConstantPoolTypes.hpp index 11c13b18d2b..e300e30ed56 100644 --- a/runtime/vm/JFRConstantPoolTypes.hpp +++ b/runtime/vm/JFRConstantPoolTypes.hpp @@ -212,6 +212,20 @@ struct StackTraceEntry { StackTraceEntry *next; }; +struct CPULoadEntry { + I_64 ticks; + float jvmUser; + float jvmSystem; + float machineTotal; +}; + +struct ThreadCPULoadEntry { + I_64 ticks; + U_32 threadIndex; + float user; + float system; +}; + struct JVMInformationEntry { const char *jvmName; const char *jvmVersion; @@ -289,6 +303,10 @@ class VM_JFRConstantPoolTypes { UDATA _threadSleepCount; J9Pool *_monitorWaitTable; UDATA _monitorWaitCount; + J9Pool *_cpuLoadTable; + UDATA _cpuLoadCount; + J9Pool *_threadCPULoadTable; + UDATA _threadCPULoadCount; /* Processing buffers */ StackFrame *_currentStackFrameBuffer; @@ -532,6 +550,10 @@ class VM_JFRConstantPoolTypes { U_32 addMonitorWaitEntry(J9JFRMonitorWaited* threadWaitData); + U_32 addCPULoadEntry(J9JFRCPULoad *cpuLoadData); + + U_32 addThreadCPULoadEntry(J9JFRThreadCPULoad *threadCPULoadData); + J9Pool *getExecutionSampleTable() { return _executionSampleTable; @@ -557,6 +579,16 @@ class VM_JFRConstantPoolTypes { return _monitorWaitTable; } + J9Pool *getCPULoadTable() + { + return _cpuLoadTable; + } + + J9Pool *getThreadCPULoadTable() + { + return _threadCPULoadTable; + } + UDATA getExecutionSampleCount() { return _executionSampleCount; @@ -582,6 +614,16 @@ class VM_JFRConstantPoolTypes { return _monitorWaitCount; } + UDATA getCPULoadCount() + { + return _cpuLoadCount; + } + + UDATA getThreadCPULoadCount() + { + return _threadCPULoadCount; + } + ClassloaderEntry *getClassloaderEntry() { return _firstClassloaderEntry; @@ -723,6 +765,11 @@ class VM_JFRConstantPoolTypes { break; case J9JFR_EVENT_TYPE_OBJECT_WAIT: addMonitorWaitEntry((J9JFRMonitorWaited*) event); + case J9JFR_EVENT_TYPE_CPU_LOAD: + addCPULoadEntry((J9JFRCPULoad *)event); + break; + case J9JFR_EVENT_TYPE_THREAD_CPU_LOAD: + addThreadCPULoadEntry((J9JFRThreadCPULoad *)event); break; default: Assert_VM_unreachable(); @@ -1026,6 +1073,10 @@ class VM_JFRConstantPoolTypes { , _threadSleepCount(0) , _monitorWaitTable(NULL) , _monitorWaitCount(0) + , _cpuLoadTable(NULL) + , _cpuLoadCount(0) + , _threadCPULoadTable(NULL) + , _threadCPULoadCount(0) , _previousStackTraceEntry(NULL) , _firstStackTraceEntry(NULL) , _previousThreadEntry(NULL) @@ -1128,6 +1179,18 @@ class VM_JFRConstantPoolTypes { goto done; } + _cpuLoadTable = pool_new(sizeof(CPULoadEntry), 0, sizeof(U_64), 0, J9_GET_CALLSITE(), OMRMEM_CATEGORY_VM, POOL_FOR_PORT(privatePortLibrary)); + if (NULL == _cpuLoadTable) { + _buildResult = OutOfMemory; + goto done; + } + + _threadCPULoadTable = pool_new(sizeof(ThreadCPULoadEntry), 0, sizeof(U_64), 0, J9_GET_CALLSITE(), OMRMEM_CATEGORY_VM, POOL_FOR_PORT(privatePortLibrary)); + if (NULL == _threadCPULoadTable) { + _buildResult = OutOfMemory; + goto done; + } + /* Add reserved index for default entries. For strings zero is the empty or NUll string. * For package zero is the deafult package, for Module zero is the unnamed module. ThreadGroup * zero is NULL threadGroup. @@ -1210,6 +1273,8 @@ class VM_JFRConstantPoolTypes { pool_kill(_threadStartTable); pool_kill(_threadEndTable); pool_kill(_threadSleepTable); + pool_kill(_cpuLoadTable); + pool_kill(_threadCPULoadTable); j9mem_free_memory(_globalStringTable); } diff --git a/runtime/vm/jfr.cpp b/runtime/vm/jfr.cpp index d23242d8578..132dc2bc7df 100644 --- a/runtime/vm/jfr.cpp +++ b/runtime/vm/jfr.cpp @@ -22,6 +22,7 @@ #include "JFRConstantPoolTypes.hpp" #include "j9protos.h" #include "omrlinkedlist.h" +#include "thread_api.h" #include "ut_j9vm.h" #include "vm_internal.h" @@ -56,6 +57,7 @@ static void jfrStartSamplingThread(J9JavaVM *vm); static void initializeEventFields(J9VMThread *currentThread, J9JFREvent *jfrEvent, UDATA eventType); static int J9THREAD_PROC jfrSamplingThreadProc(void *entryArg); static void jfrExecutionSampleCallback(J9VMThread *currentThread, IDATA handlerKey, void *userData); +static void jfrThreadCPULoadCallback(J9VMThread *currentThread, IDATA handlerKey, void *userData); /** * Calculate the size in bytes of a JFR event. @@ -83,6 +85,11 @@ jfrEventSize(J9JFREvent *jfrEvent) break; case J9JFR_EVENT_TYPE_OBJECT_WAIT: size = sizeof(J9JFRMonitorWaited) + (((J9JFRMonitorWaited*)jfrEvent)->stackTraceSize * sizeof(UDATA)); + case J9JFR_EVENT_TYPE_CPU_LOAD: + size = sizeof(J9JFRCPULoad); + break; + case J9JFR_EVENT_TYPE_THREAD_CPU_LOAD: + size = sizeof(J9JFRThreadCPULoad); break; default: Assert_VM_unreachable(); @@ -601,6 +608,11 @@ initializeJFR(J9JavaVM *vm, BOOLEAN lateInit) goto fail; } + vm->jfrThreadCPULoadAsyncKey = J9RegisterAsyncEvent(vm, jfrThreadCPULoadCallback, NULL); + if (vm->jfrThreadCPULoadAsyncKey < 0) { + goto fail; + } + if ((*vmHooks)->J9HookRegisterWithCallSite(vmHooks, J9HOOK_VM_THREAD_CREATED, jfrThreadCreated, OMR_GET_CALLSITE(), NULL)) { goto fail; } @@ -656,6 +668,8 @@ initializeJFR(J9JavaVM *vm, BOOLEAN lateInit) goto fail; } + vm->jfrState.prevSysCPUTime.timestamp = -1; + vm->jfrState.prevProcTimestamp = -1; if (omrthread_monitor_init_with_name(&vm->jfrBufferMutex, 0, "JFR global buffer mutex")) { goto fail; } @@ -759,6 +773,11 @@ tearDownJFR(J9JavaVM *vm) J9UnregisterAsyncEvent(vm, vm->jfrAsyncKey); vm->jfrAsyncKey = -1; } + if (vm->jfrThreadCPULoadAsyncKey >= 0) { + J9UnregisterAsyncEvent(vm, vm->jfrThreadCPULoadAsyncKey); + vm->jfrThreadCPULoadAsyncKey = -1; + } + } /** @@ -804,6 +823,91 @@ jfrExecutionSampleCallback(J9VMThread *currentThread, IDATA handlerKey, void *us jfrExecutionSample(currentThread, currentThread); } +void +jfrCPULoad(J9VMThread *currentThread) +{ + PORT_ACCESS_FROM_VMC(currentThread); + OMRPORT_ACCESS_FROM_J9PORT(PORTLIB); + + omrthread_process_time_t currentProcCPUTime = {0}; + intptr_t processTimeRC = omrthread_get_process_times(¤tProcCPUTime); + + J9SysinfoCPUTime currentSysCPUTime = {0}; + intptr_t sysTimeRC = omrsysinfo_get_CPU_utilization(¤tSysCPUTime); + + if (0 == processTimeRC && 0 == sysTimeRC) { + J9JFRCPULoad *jfrEvent = (J9JFRCPULoad *)reserveBuffer(currentThread, sizeof(J9JFRCPULoad)); + if (NULL != jfrEvent) { + initializeEventFields(currentThread, (J9JFREvent *)jfrEvent, J9JFR_EVENT_TYPE_CPU_LOAD); + + JFRState *jfrState = ¤tThread->javaVM->jfrState; + uintptr_t numberOfCpus = j9sysinfo_get_number_CPUs_by_type(J9PORT_CPU_ONLINE); + int64_t currentTime = j9time_nano_time(); + omrthread_process_time_t *prevProcCPUTimes = &jfrState->prevProcCPUTimes; + int64_t *prevProcTimestamp = &jfrState->prevProcTimestamp; + + if (-1 == *prevProcTimestamp) { + jfrEvent->jvmUser = 0; + jfrEvent->jvmSystem = 0; + } else { + int64_t timeDelta = currentTime - *prevProcTimestamp; + jfrEvent->jvmUser = OMR_MIN((currentProcCPUTime._userTime - prevProcCPUTimes->_userTime) / ((double)numberOfCpus * timeDelta), 1.0); + jfrEvent->jvmSystem = OMR_MIN((currentProcCPUTime._systemTime - prevProcCPUTimes->_systemTime) / ((double)numberOfCpus * timeDelta), 1.0); + } + *prevProcCPUTimes = currentProcCPUTime; + *prevProcTimestamp = currentTime; + + J9SysinfoCPUTime *prevSysCPUTime = &jfrState->prevSysCPUTime; + if (-1 == prevSysCPUTime->timestamp) { + jfrEvent->machineTotal = 0; + } else { + jfrEvent->machineTotal = OMR_MIN((currentSysCPUTime.cpuTime - prevSysCPUTime->cpuTime) / ((double)numberOfCpus * (currentSysCPUTime.timestamp - prevSysCPUTime->timestamp)), 1.0); + } + *prevSysCPUTime = currentSysCPUTime; + } + } + +} + +void +jfrThreadCPULoad(J9VMThread *currentThread, J9VMThread *sampleThread) +{ + PORT_ACCESS_FROM_VMC(currentThread); + + omrthread_thread_time_t threadTime; + + intptr_t rc = omrthread_get_thread_times(&threadTime); + + if (-1 != rc) { + J9JFRThreadCPULoad *jfrEvent = (J9JFRThreadCPULoad*)reserveBuffer(currentThread, sizeof(*jfrEvent)); + if (NULL != jfrEvent) { + initializeEventFields(currentThread, (J9JFREvent *)jfrEvent, J9JFR_EVENT_TYPE_THREAD_CPU_LOAD); + + ThreadJFRState* jfrState = &sampleThread->threadJfrState; + int64_t currentTime = j9time_nano_time(); + int64_t *prevTimestamp = &jfrState->prevTimestamp; + omrthread_thread_time_t *prevThreadCPUTimes = &jfrState->prevThreadCPUTimes; + + if (-1 == *prevTimestamp) { + jfrEvent->user = 0; + jfrEvent->system = 0; + } else { + int64_t timeDelta = currentTime - *prevTimestamp; + jfrEvent->user = OMR_MIN((threadTime.userTime - prevThreadCPUTimes->userTime) / ((double)timeDelta), 1.0); + jfrEvent->system = OMR_MIN((threadTime.sysTime - prevThreadCPUTimes->sysTime) / ((double)timeDelta), 1.0); + } + *prevTimestamp = currentTime; + *prevThreadCPUTimes = threadTime; + } + } +} + +static void +jfrThreadCPULoadCallback(J9VMThread *currentThread, IDATA handlerKey, void *userData) +{ + jfrThreadCPULoad(currentThread, currentThread); +} + static int J9THREAD_PROC jfrSamplingThreadProc(void *entryArg) { @@ -814,8 +918,16 @@ jfrSamplingThreadProc(void *entryArg) omrthread_monitor_enter(vm->jfrSamplerMutex); vm->jfrSamplerState = J9JFR_SAMPLER_STATE_RUNNING; omrthread_monitor_notify_all(vm->jfrSamplerMutex); + UDATA count = 0; while (J9JFR_SAMPLER_STATE_STOP != vm->jfrSamplerState) { J9SignalAsyncEvent(vm, NULL, vm->jfrAsyncKey); + if (0 == (count % 100)) { // 1000ms + jfrCPULoad(currentThread); + } + if (0 == (count % 1000)) { // 10s + J9SignalAsyncEvent(vm, NULL, vm->jfrThreadCPULoadAsyncKey); + } + count += 1; omrthread_monitor_wait_timed(vm->jfrSamplerMutex, J9JFR_SAMPLING_RATE, 0); } omrthread_monitor_exit(vm->jfrSamplerMutex); diff --git a/runtime/vm/vmthread.cpp b/runtime/vm/vmthread.cpp index 3a019c0ac39..cd5966d47fd 100644 --- a/runtime/vm/vmthread.cpp +++ b/runtime/vm/vmthread.cpp @@ -297,6 +297,10 @@ allocateVMThread(J9JavaVM *vm, omrthread_t osThread, UDATA privateFlags, void *m newThread->scopedValueCache = NULL; #endif /* JAVA_SPEC_VERSION >= 19 */ +#if defined(J9VM_OPT_JFR) + newThread->threadJfrState.prevTimestamp = -1; +#endif /* defined(J9VM_OPT_JFR) */ + /* If an exclusive access request is in progress, mark this thread */ omrthread_monitor_enter(vm->exclusiveAccessMutex);