Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New ClientErrors monitor and Monitor parameter support #416

Merged
merged 1 commit into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* Improved the handling of track log timestamps - these can now be supplied by the client and are no
longer bound to insert time of DB record. Latest Hoist React uses *start* of the tracked activity.
* Support for persisting of memory monitoring results
* New built-in monitor `xhClientErrorsMonitor`
* New methods `MonitorResult.getParam` and `MonitorResult.getRequiredParam`

### ⚙️ Technical

Expand Down
11 changes: 11 additions & 0 deletions src/main/groovy/io/xh/hoist/monitor/MonitorResult.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ class MonitorResult implements JSONFormat {
monitor.params ? JSONParser.parseObject(monitor.params) : [:]
}

<T> T getParam(String name, T defaultVal = null) {
params.containsKey(name) ? params[name] : defaultVal
}

<T> T getRequiredParam(String name) {
if (!params.containsKey(name)) {
throw new RuntimeException("Missing required parameter ${name}")
}
params[name]
}

/** Combines the given string with 'message', separated by formatting */
void prependMessage(String prependStr) {
// Space character before the newlines is for fallback formatting in `hoist-react <= v51.0.0`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@

package io.xh.hoist.monitor.provided

import grails.gorm.transactions.ReadOnly
import grails.gorm.transactions.Transactional
import groovy.sql.Sql
import io.xh.hoist.BaseService
import io.xh.hoist.data.filter.Filter
import io.xh.hoist.monitor.Monitor
import io.xh.hoist.monitor.MonitorResult
import io.xh.hoist.util.Utils
import io.xh.hoist.clienterror.ClientError
import io.xh.hoist.track.TrackLog

import static io.xh.hoist.monitor.MonitorStatus.FAIL
import static io.xh.hoist.monitor.MonitorStatus.INACTIVE
Expand Down Expand Up @@ -51,18 +53,14 @@ class DefaultMonitorDefinitionService extends BaseService {
return
}

def aggregate = result.params.aggregate ?: 'avg'
if (!['avg', 'max'].contains(aggregate)) {
throw new RuntimeException("Invalid aggregate parameter: ${result.params.aggregate}")
def aggregate = result.getParam('aggregate', 'avg')
if (!(aggregate in ['avg', 'max'])) {
throw new RuntimeException("Invalid aggregate parameter: $aggregate")
}

def lookbackMinutes = result.params.lookbackMinutes
if (!lookbackMinutes) {
throw new RuntimeException('No \"lookbackMinutes\" parameter provided')
}

def cutOffTime = currentTimeMillis() - lookbackMinutes * MINUTES
def snapshots = memoryMonitoringService.snapshots.findAll {it.key > cutOffTime}.values()
def lookback = result.getRequiredParam('lookbackMinutes') * MINUTES,
cutoffTime = currentTimeMillis() - lookback,
snapshots = memoryMonitoringService.snapshots.findAll {it.key > cutoffTime}.values()

if (!snapshots) {
result.metric = 0
Expand All @@ -74,50 +72,34 @@ class DefaultMonitorDefinitionService extends BaseService {
: snapshots.max{it.usedPctMax}.usedPctMax
}

@ReadOnly
def xhClientErrorsMonitor(MonitorResult result) {
def lookback = result.getRequiredParam('lookbackMinutes') * MINUTES,
cutoffDate = new Date(currentTimeMillis() - lookback)

result.metric = ClientError.countByDateCreatedGreaterThan(cutoffDate)
}

@ReadOnly
def xhLoadTimeMonitor(MonitorResult result) {
if (!trackLogAdminService.enabled) {
result.status = INACTIVE
return
}

def lookbackMinutes = result.params.lookbackMinutes
if (!lookbackMinutes) {
throw new RuntimeException('No \"lookbackMinutes\" parameter provided.')
}

def cutOffTime = currentTimeMillis() - lookbackMinutes * MINUTES
def logs = trackLogAdminService.queryTrackLog(
Filter.parse([
filters: [
[
field: 'dateCreated',
op: '>',
value: new Date(cutOffTime)
],
[
field: 'elapsed',
op: '!=',
value: null
]
],
op: "AND"
])
)

if (!logs) {
result.metric = 0
return
}
def lookback = result.getRequiredParam('lookbackMinutes') * MINUTES,
cutoffDate = new Date(currentTimeMillis() - lookback),
logs = TrackLog.findAllByDateCreatedGreaterThanAndElapsedIsNotNull(cutoffDate)

result.metric = logs.max{it.elapsed}.elapsed / SECONDS
result.metric = logs ? logs.max{it.elapsed}.elapsed / SECONDS : 0
}

def xhDbConnectionMonitor(MonitorResult result) {
def startTime = currentTimeMillis()
Sql sql = new Sql(dataSource)
try {
// Support configurable table name for edge case where XH tables are in a custom schema.
def tableName = result.params.tableName ?: 'xh_monitor'
def tableName = result.getParam('tableName', 'xh_monitor')
sql.rows("SELECT * FROM ${Sql.expand(tableName)} WHERE code = 'xhDbConnectionMonitor'")
} finally {
sql.close()
Expand All @@ -132,12 +114,9 @@ class DefaultMonitorDefinitionService extends BaseService {
return
}

if (!result.params.queryUser) {
throw new RuntimeException("No \"queryUser\" parameter provided.")
}

def startTime = currentTimeMillis()
def user = ldapService.lookupUser(result.params.queryUser)
def queryUser = result.getRequiredParam('queryUser'),
user = ldapService.lookupUser(queryUser),
startTime = currentTimeMillis()

if (!user) {
result.message = "Failed to find expected user: ${result.params.queryUser}"
Expand Down Expand Up @@ -177,6 +156,18 @@ class DefaultMonitorDefinitionService extends BaseService {
+ 'Set "aggregate" to "avg" to report average heap usage (default).\n'
+ 'Set "aggregate" to "max" to report the largest heap usage.'
],
[
code: 'xhClientErrorsMonitor',
name: 'Client Errors (Last 30m)',
metricType: 'Ceil',
metricUnit: 's',
warnThreshold: 1,
failThreshold: 10,
active: true,
primaryOnly: true,
params: '{\n\t"lookbackMinutes": 30\n}',
notes: 'Reports the longest tracked event in the last {lookbackMinutes} minutes.'
],
[
code: 'xhLoadTimeMonitor',
name: 'Max Load Time (Last 30m)',
Expand Down
Loading