From 5db976027ee83805465ad57dfd8ba69ec15df0d3 Mon Sep 17 00:00:00 2001 From: Lee Wexler Date: Mon, 14 Oct 2024 22:47:52 -0400 Subject: [PATCH] Persist MemoryMonitoring for defunct instances (#413) * Persist MemoryMonitoring for defunct instances * Persist count of snapshots, not a particular time period --- CHANGELOG.md | 1 + .../MemoryMonitorAdminController.groovy | 11 ++- grails-app/init/io/xh/hoist/BootStrap.groovy | 4 +- .../admin/MemoryMonitoringService.groovy | 73 ++++++++++++++++++- 4 files changed, 86 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54c3636c..27901b36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ request, helping to reduce network overhead for chatty apps. * Improved the handling of track log timestamps - these can now be supplied by the client and are no longer bound to insert time of DB record. Latest Hoist React uses *start* of the tracked activity. +* Support for persisting of memory monitoring results ### ⚙️ Technical diff --git a/grails-app/controllers/io/xh/hoist/admin/cluster/MemoryMonitorAdminController.groovy b/grails-app/controllers/io/xh/hoist/admin/cluster/MemoryMonitorAdminController.groovy index 0a2370e9..0378394e 100644 --- a/grails-app/controllers/io/xh/hoist/admin/cluster/MemoryMonitorAdminController.groovy +++ b/grails-app/controllers/io/xh/hoist/admin/cluster/MemoryMonitorAdminController.groovy @@ -16,6 +16,8 @@ import static io.xh.hoist.util.Utils.appContext @Access(['HOIST_ADMIN_READER']) class MemoryMonitorAdminController extends BaseController { + def memoryMonitoringService + def snapshots(String instance) { runOnInstance(new Snapshots(), instance) } @@ -46,7 +48,6 @@ class MemoryMonitorAdminController extends BaseController { } } - @Access(['HOIST_ADMIN']) def dumpHeap(String filename, String instance) { runOnInstance(new DumpHeap(filename: filename), instance) @@ -59,4 +60,12 @@ class MemoryMonitorAdminController extends BaseController { return [success: true] } } + + def availablePastInstances() { + renderJSON(memoryMonitoringService.availablePastInstances()) + } + + def snapshotsForPastInstance(String instance) { + renderJSON(memoryMonitoringService.snapshotsForPastInstance(instance)) + } } \ No newline at end of file diff --git a/grails-app/init/io/xh/hoist/BootStrap.groovy b/grails-app/init/io/xh/hoist/BootStrap.groovy index f19e3453..2156bc1e 100644 --- a/grails-app/init/io/xh/hoist/BootStrap.groovy +++ b/grails-app/init/io/xh/hoist/BootStrap.groovy @@ -268,7 +268,9 @@ class BootStrap implements LogSupport { enabled: true, snapshotInterval: 60, maxSnapshots: 1440, - heapDumpDir: null + heapDumpDir: null, + preservePastInstances: true, + maxPastInstances: 10 ], clientVisible: true, groupName: 'xh.io', diff --git a/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy b/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy index 75abb4a8..6ba54e9d 100644 --- a/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy +++ b/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy @@ -8,6 +8,7 @@ package io.xh.hoist.admin import com.sun.management.HotSpotDiagnosticMXBean +import grails.gorm.transactions.Transactional import io.xh.hoist.BaseService import io.xh.hoist.util.DateTimeUtils @@ -15,11 +16,17 @@ import java.lang.management.GarbageCollectorMXBean import java.lang.management.ManagementFactory import java.util.concurrent.ConcurrentHashMap +import static io.xh.hoist.json.JSONParser.parseObject +import static io.xh.hoist.util.DateTimeUtils.MINUTES import static io.xh.hoist.util.DateTimeUtils.intervalElapsed +import static io.xh.hoist.util.Utils.getAppEnvironment +import static io.xh.hoist.util.Utils.isProduction import static io.xh.hoist.util.Utils.startupTime +import static io.xh.hoist.util.DateTimeUtils.HOURS import static java.lang.Runtime.getRuntime import static java.lang.System.currentTimeMillis + /** * Service to sample and return simple statistics on heap (memory) usage from the JVM runtime. * Collects rolling history of snapshots on a configurable timer. @@ -27,9 +34,13 @@ import static java.lang.System.currentTimeMillis class MemoryMonitoringService extends BaseService { def configService + def jsonBlobService private Map _snapshots = new ConcurrentHashMap() private Date _lastInfoLogged + private final String blobOwner = 'xhMemoryMonitoringService' + private final static String blobType = isProduction ? 'xhMemorySnapshots' : "xhMemorySnapshots_$appEnvironment" + private String blobToken void init() { createTimer( @@ -37,6 +48,14 @@ class MemoryMonitoringService extends BaseService { runFn: this.&takeSnapshot, interval: {this.enabled ? config.snapshotInterval * DateTimeUtils.SECONDS: -1} ) + + createTimer( + name: 'cullPersisted', + runFn: this.&cullPersisted, + interval: 1 * HOURS, + delay: 5 * MINUTES, + primaryOnly: true + ) } boolean getEnabled() { @@ -86,13 +105,15 @@ class MemoryMonitoringService extends BaseService { _snapshots.remove(oldest.key) } - if (intervalElapsed(1 * DateTimeUtils.HOURS, _lastInfoLogged)) { + if (intervalElapsed(1 * HOURS, _lastInfoLogged)) { logInfo(newSnap) _lastInfoLogged = new Date() } else { logDebug(newSnap) } + if (config.preservePastInstances) persistSnapshots() + return newSnap } @@ -108,6 +129,25 @@ class MemoryMonitoringService extends BaseService { ] } + /** + * Get list of past instances for which snapshots are available. + */ + List availablePastInstances() { + if (!config.preservePastInstances) return [] + jsonBlobService + .list(blobType, blobOwner) + .findAll { !clusterService.isMember(it.name) } + .collect { [name: it.name, lastUpdated: it.lastUpdated] } + } + + /** + * Get snapshots for a past instance. + */ + Map snapshotsForPastInstance(String instanceName) { + def blob = jsonBlobService.list(blobType, blobOwner).find { it.name == instanceName } + blob ? parseObject(blob.value) : [:] + } + //------------------------ // Implementation //------------------------ @@ -169,6 +209,37 @@ class MemoryMonitoringService extends BaseService { return Math.round(v * 100) / 100 } + private void persistSnapshots() { + try { + if (blobToken) { + jsonBlobService.update(blobToken, [value: snapshots], blobOwner) + } else { + def blob = jsonBlobService.create([ + name : clusterService.instanceName, + type : blobType, + value: snapshots + ], blobOwner) + blobToken = blob.token + } + } catch (Exception e) { + logError('Failed to persist memory snapshots', e) + blobToken = null + } + } + + @Transactional + private cullPersisted() { + def all = jsonBlobService.list(blobType, blobOwner).sort { it.lastUpdated }, + maxKeep = config.maxPastInstances != null ? Math.max(config.maxPastInstances, 0) : 5, + toDelete = all.dropRight(maxKeep) + + if (toDelete) { + withInfo(['Deleting memory snapshots', [count: toDelete.size()]]) { + toDelete.each { it.delete() } + } + } + } + void clearCaches() { _snapshots.clear() super.clearCaches()