Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persist MemoryMonitoring for defunct instances #413

Merged
merged 3 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

## 24.0-SNAPSHOT - unreleased

### 💥 Breaking Changes (upgrade difficulty: 🟢 LOW - requires Hoist React v69)

* Support bulk tracking messages. Improve timestamps on tracking messages
* Support for persisting of memory monitoring results

## 23.0.0 - 2024-09-27

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import static io.xh.hoist.util.Utils.appContext
@Access(['HOIST_ADMIN_READER'])
class MemoryMonitorAdminController extends BaseController {

def memoryMonitoringService

def snapshots(String instance) {
runOnInstance(new Snapshots(), instance)
}
Expand Down Expand Up @@ -46,7 +48,6 @@ class MemoryMonitorAdminController extends BaseController {
}
}


@Access(['HOIST_ADMIN'])
def dumpHeap(String filename, String instance) {
runOnInstance(new DumpHeap(filename: filename), instance)
Expand All @@ -59,4 +60,12 @@ class MemoryMonitorAdminController extends BaseController {
return [success: true]
}
}

def availablePastInstances() {
renderJSON(memoryMonitoringService.availablePastInstances())
}

def snapshotsForPastInstance(String instance) {
renderJSON(memoryMonitoringService.snapshotsForPastInstance(instance))
}
}
1 change: 1 addition & 0 deletions grails-app/init/io/xh/hoist/BootStrap.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ class BootStrap implements LogSupport {
valueType: 'json',
defaultValue: [
enabled: true,
persistHours: 24,
snapshotInterval: 60,
maxSnapshots: 1440,
heapDumpDir: null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,53 @@
package io.xh.hoist.admin

import com.sun.management.HotSpotDiagnosticMXBean
import grails.gorm.transactions.Transactional
import io.xh.hoist.BaseService
import io.xh.hoist.util.DateTimeUtils

import java.lang.management.GarbageCollectorMXBean
import java.lang.management.ManagementFactory
import java.util.concurrent.ConcurrentHashMap

import static io.xh.hoist.json.JSONParser.parseObject
import static io.xh.hoist.util.DateTimeUtils.intervalElapsed
import static io.xh.hoist.util.Utils.getAppEnvironment
import static io.xh.hoist.util.Utils.isProduction
import static io.xh.hoist.util.Utils.startupTime
import static io.xh.hoist.util.DateTimeUtils.HOURS
import static java.lang.Runtime.getRuntime
import static java.lang.System.currentTimeMillis


/**
* Service to sample and return simple statistics on heap (memory) usage from the JVM runtime.
* Collects rolling history of snapshots on a configurable timer.
*/
class MemoryMonitoringService extends BaseService {

def configService
def jsonBlobService

private Map<Long, Map> _snapshots = new ConcurrentHashMap()
private Date _lastInfoLogged
private final String blobOwner = 'xhMemoryMonitoringService'
private final static String blobType = isProduction ? 'xhMemorySnapshots' : "xhMemorySnapshots_$appEnvironment"
lbwexler marked this conversation as resolved.
Show resolved Hide resolved
private String blobToken

void init() {
createTimer(
name: 'takeSnapshot',
runFn: this.&takeSnapshot,
interval: {this.enabled ? config.snapshotInterval * DateTimeUtils.SECONDS: -1}
)

createTimer(
name: 'cullPersisted',
runFn: this.&cullPersisted,
interval: {config.persistHours > 0 ? 1 * HOURS : -1},
delay: true,
primaryOnly: true
)
}

boolean getEnabled() {
Expand Down Expand Up @@ -86,13 +104,15 @@ class MemoryMonitoringService extends BaseService {
_snapshots.remove(oldest.key)
}

if (intervalElapsed(1 * DateTimeUtils.HOURS, _lastInfoLogged)) {
if (intervalElapsed(1 * HOURS, _lastInfoLogged)) {
logInfo(newSnap)
_lastInfoLogged = new Date()
} else {
logDebug(newSnap)
}

if (config.persistHours > 0) persistSnapshots()

return newSnap
}

Expand All @@ -108,6 +128,24 @@ class MemoryMonitoringService extends BaseService {
]
}

/**
* Get list of past instances for which snapshots are available.
*/
List<Map> availablePastInstances() {
jsonBlobService
.list(blobType, blobOwner)
.findAll { !clusterService.isMember(it.name) }
.collect { [name: it.name, lastUpdated: it.lastUpdated] }
}

/**
* Get snapshots for a past instance.
*/
Map snapshotsForPastInstance(String instanceName) {
def blob = jsonBlobService.list(blobType, blobOwner).find { it.name == instanceName }
blob ? parseObject(blob.value) : [:]
}

//------------------------
// Implementation
//------------------------
Expand Down Expand Up @@ -169,6 +207,38 @@ class MemoryMonitoringService extends BaseService {
return Math.round(v * 100) / 100
}

private void persistSnapshots() {
try {
if (blobToken) {
jsonBlobService.update(blobToken, [value: snapshots], blobOwner)
} else {
def blob = jsonBlobService.create([
name : clusterService.instanceName,
type : blobType,
value: snapshots
], blobOwner)
blobToken = blob.token
}
} catch (Exception e) {
logError('Failed to persist memory snapshots', e)
blobToken = null
}
}

@Transactional
private cullPersisted() {
def persistMs = config.persistHours ? config.persistHours * HOURS : null,
toDelete = jsonBlobService
.list(blobType, blobOwner)
.findAll { !persistMs || intervalElapsed(persistMs, it.lastUpdated) }
lbwexler marked this conversation as resolved.
Show resolved Hide resolved

if (toDelete) {
withInfo(['Deleting expired memory snapshots', [count: toDelete.size()]]) {
toDelete.each { it.delete() }
}
}
}

void clearCaches() {
_snapshots.clear()
super.clearCaches()
Expand Down
Loading