From 0f4b55636bc88672f724993cb890c457243cdbeb Mon Sep 17 00:00:00 2001 From: lbwexler Date: Sun, 13 Oct 2024 22:58:19 -0400 Subject: [PATCH 1/2] Persist MemoryMonitoring for defunct instances --- CHANGELOG.md | 3 + .../MemoryMonitorAdminController.groovy | 11 ++- grails-app/init/io/xh/hoist/BootStrap.groovy | 1 + .../admin/MemoryMonitoringService.groovy | 72 ++++++++++++++++++- 4 files changed, 85 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index adfbb8cc..6624168a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,10 @@ ## 24.0-SNAPSHOT - unreleased +### 💥 Breaking Changes (upgrade difficulty: 🟢 LOW - requires Hoist React v69) + * Support bulk tracking messages. Improve timestamps on tracking messages +* Support for persisting of memory monitoring results ## 23.0.0 - 2024-09-27 diff --git a/grails-app/controllers/io/xh/hoist/admin/cluster/MemoryMonitorAdminController.groovy b/grails-app/controllers/io/xh/hoist/admin/cluster/MemoryMonitorAdminController.groovy index 0a2370e9..0378394e 100644 --- a/grails-app/controllers/io/xh/hoist/admin/cluster/MemoryMonitorAdminController.groovy +++ b/grails-app/controllers/io/xh/hoist/admin/cluster/MemoryMonitorAdminController.groovy @@ -16,6 +16,8 @@ import static io.xh.hoist.util.Utils.appContext @Access(['HOIST_ADMIN_READER']) class MemoryMonitorAdminController extends BaseController { + def memoryMonitoringService + def snapshots(String instance) { runOnInstance(new Snapshots(), instance) } @@ -46,7 +48,6 @@ class MemoryMonitorAdminController extends BaseController { } } - @Access(['HOIST_ADMIN']) def dumpHeap(String filename, String instance) { runOnInstance(new DumpHeap(filename: filename), instance) @@ -59,4 +60,12 @@ class MemoryMonitorAdminController extends BaseController { return [success: true] } } + + def availablePastInstances() { + renderJSON(memoryMonitoringService.availablePastInstances()) + } + + def snapshotsForPastInstance(String instance) { + renderJSON(memoryMonitoringService.snapshotsForPastInstance(instance)) + } } \ No newline at end of file diff --git a/grails-app/init/io/xh/hoist/BootStrap.groovy b/grails-app/init/io/xh/hoist/BootStrap.groovy index f19e3453..6dce564c 100644 --- a/grails-app/init/io/xh/hoist/BootStrap.groovy +++ b/grails-app/init/io/xh/hoist/BootStrap.groovy @@ -266,6 +266,7 @@ class BootStrap implements LogSupport { valueType: 'json', defaultValue: [ enabled: true, + persistHours: 24, snapshotInterval: 60, maxSnapshots: 1440, heapDumpDir: null diff --git a/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy b/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy index 75abb4a8..a97ff8e7 100644 --- a/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy +++ b/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy @@ -8,6 +8,7 @@ package io.xh.hoist.admin import com.sun.management.HotSpotDiagnosticMXBean +import grails.gorm.transactions.Transactional import io.xh.hoist.BaseService import io.xh.hoist.util.DateTimeUtils @@ -15,11 +16,16 @@ import java.lang.management.GarbageCollectorMXBean import java.lang.management.ManagementFactory import java.util.concurrent.ConcurrentHashMap +import static io.xh.hoist.json.JSONParser.parseObject import static io.xh.hoist.util.DateTimeUtils.intervalElapsed +import static io.xh.hoist.util.Utils.getAppEnvironment +import static io.xh.hoist.util.Utils.isProduction import static io.xh.hoist.util.Utils.startupTime +import static io.xh.hoist.util.DateTimeUtils.HOURS import static java.lang.Runtime.getRuntime import static java.lang.System.currentTimeMillis + /** * Service to sample and return simple statistics on heap (memory) usage from the JVM runtime. * Collects rolling history of snapshots on a configurable timer. @@ -27,9 +33,13 @@ import static java.lang.System.currentTimeMillis class MemoryMonitoringService extends BaseService { def configService + def jsonBlobService private Map _snapshots = new ConcurrentHashMap() private Date _lastInfoLogged + private final String blobOwner = 'xhMemoryMonitoringService' + private final static String blobType = isProduction ? 'xhMemorySnapshots' : "xhMemorySnapshots_$appEnvironment" + private String blobToken void init() { createTimer( @@ -37,6 +47,14 @@ class MemoryMonitoringService extends BaseService { runFn: this.&takeSnapshot, interval: {this.enabled ? config.snapshotInterval * DateTimeUtils.SECONDS: -1} ) + + createTimer( + name: 'cullPersisted', + runFn: this.&cullPersisted, + interval: {config.persistHours > 0 ? 1 * HOURS : -1}, + delay: true, + primaryOnly: true + ) } boolean getEnabled() { @@ -86,13 +104,15 @@ class MemoryMonitoringService extends BaseService { _snapshots.remove(oldest.key) } - if (intervalElapsed(1 * DateTimeUtils.HOURS, _lastInfoLogged)) { + if (intervalElapsed(1 * HOURS, _lastInfoLogged)) { logInfo(newSnap) _lastInfoLogged = new Date() } else { logDebug(newSnap) } + if (config.persistHours > 0) persistSnapshots() + return newSnap } @@ -108,6 +128,24 @@ class MemoryMonitoringService extends BaseService { ] } + /** + * Get list of past instances for which snapshots are available. + */ + List availablePastInstances() { + jsonBlobService + .list(blobType, blobOwner) + .findAll { !clusterService.isMember(it.name) } + .collect { [name: it.name, lastUpdated: it.lastUpdated] } + } + + /** + * Get snapshots for a past instance. + */ + Map snapshotsForPastInstance(String instanceName) { + def blob = jsonBlobService.list(blobType, blobOwner).find { it.name == instanceName } + blob ? parseObject(blob.value) : [:] + } + //------------------------ // Implementation //------------------------ @@ -169,6 +207,38 @@ class MemoryMonitoringService extends BaseService { return Math.round(v * 100) / 100 } + private void persistSnapshots() { + try { + if (blobToken) { + jsonBlobService.update(blobToken, [value: snapshots], blobOwner) + } else { + def blob = jsonBlobService.create([ + name : clusterService.instanceName, + type : blobType, + value: snapshots + ], blobOwner) + blobToken = blob.token + } + } catch (Exception e) { + logError('Failed to persist memory snapshots', e) + blobToken = null + } + } + + @Transactional + private cullPersisted() { + def persistMs = config.persistHours ? config.persistHours * HOURS : null, + toDelete = jsonBlobService + .list(blobType, blobOwner) + .findAll { !persistMs || intervalElapsed(persistMs, it.lastUpdated) } + + if (toDelete) { + withInfo(['Deleting expired memory snapshots', [count: toDelete.size()]]) { + toDelete.each { it.delete() } + } + } + } + void clearCaches() { _snapshots.clear() super.clearCaches() From 075508ff5422bcfea7b4c9cbbc62e5242131236b Mon Sep 17 00:00:00 2001 From: lbwexler Date: Mon, 14 Oct 2024 22:42:48 -0400 Subject: [PATCH 2/2] Persist count of snapshots, not a particular time period --- grails-app/init/io/xh/hoist/BootStrap.groovy | 5 +++-- .../hoist/admin/MemoryMonitoringService.groovy | 17 +++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/grails-app/init/io/xh/hoist/BootStrap.groovy b/grails-app/init/io/xh/hoist/BootStrap.groovy index 6dce564c..2156bc1e 100644 --- a/grails-app/init/io/xh/hoist/BootStrap.groovy +++ b/grails-app/init/io/xh/hoist/BootStrap.groovy @@ -266,10 +266,11 @@ class BootStrap implements LogSupport { valueType: 'json', defaultValue: [ enabled: true, - persistHours: 24, snapshotInterval: 60, maxSnapshots: 1440, - heapDumpDir: null + heapDumpDir: null, + preservePastInstances: true, + maxPastInstances: 10 ], clientVisible: true, groupName: 'xh.io', diff --git a/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy b/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy index a97ff8e7..6ba54e9d 100644 --- a/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy +++ b/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy @@ -17,6 +17,7 @@ import java.lang.management.ManagementFactory import java.util.concurrent.ConcurrentHashMap import static io.xh.hoist.json.JSONParser.parseObject +import static io.xh.hoist.util.DateTimeUtils.MINUTES import static io.xh.hoist.util.DateTimeUtils.intervalElapsed import static io.xh.hoist.util.Utils.getAppEnvironment import static io.xh.hoist.util.Utils.isProduction @@ -51,8 +52,8 @@ class MemoryMonitoringService extends BaseService { createTimer( name: 'cullPersisted', runFn: this.&cullPersisted, - interval: {config.persistHours > 0 ? 1 * HOURS : -1}, - delay: true, + interval: 1 * HOURS, + delay: 5 * MINUTES, primaryOnly: true ) } @@ -111,7 +112,7 @@ class MemoryMonitoringService extends BaseService { logDebug(newSnap) } - if (config.persistHours > 0) persistSnapshots() + if (config.preservePastInstances) persistSnapshots() return newSnap } @@ -132,6 +133,7 @@ class MemoryMonitoringService extends BaseService { * Get list of past instances for which snapshots are available. */ List availablePastInstances() { + if (!config.preservePastInstances) return [] jsonBlobService .list(blobType, blobOwner) .findAll { !clusterService.isMember(it.name) } @@ -227,13 +229,12 @@ class MemoryMonitoringService extends BaseService { @Transactional private cullPersisted() { - def persistMs = config.persistHours ? config.persistHours * HOURS : null, - toDelete = jsonBlobService - .list(blobType, blobOwner) - .findAll { !persistMs || intervalElapsed(persistMs, it.lastUpdated) } + def all = jsonBlobService.list(blobType, blobOwner).sort { it.lastUpdated }, + maxKeep = config.maxPastInstances != null ? Math.max(config.maxPastInstances, 0) : 5, + toDelete = all.dropRight(maxKeep) if (toDelete) { - withInfo(['Deleting expired memory snapshots', [count: toDelete.size()]]) { + withInfo(['Deleting memory snapshots', [count: toDelete.size()]]) { toDelete.each { it.delete() } } }