Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds metrics for task states #14785

Merged
merged 2 commits into from
Jan 22, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Adds tests
noob-se7en committed Jan 9, 2025
commit 0dd2c2fa9dbf0e28868fa2880398fadeeb73d59c
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@ rules:
tableType: "$6"
partition: "$7"
# Gauges that accept the controller taskType
- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<type=\"ControllerMetrics\", name=\"pinot\\.controller\\.(numMinionTasksInProgress|numMinionSubtasksRunning|numMinionSubtasksWaiting|numMinionSubtasksError|numMinionSubtasksUnknown|percentMinionSubtasksInQueue|percentMinionSubtasksInError)\\.(\\w+)\"><>(\\w+)"
- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<type=\"ControllerMetrics\", name=\"pinot\\.controller\\.(numMinionTasksInProgress|numMinionSubtasksRunning|numMinionSubtasksWaiting|numMinionSubtasksError|numMinionSubtasksUnknown|numMinionSubtasksDropped|numMinionSubtasksTimedOut|numMinionSubtasksAborted|percentMinionSubtasksInQueue|percentMinionSubtasksInError)\\.(\\w+)\"><>(\\w+)"
name: "pinot_controller_$1_$3"
cache: true
labels:
Original file line number Diff line number Diff line change
@@ -71,7 +71,7 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
NUM_MINION_SUBTASKS_ERROR("NumMinionSubtasksError", true),
NUM_MINION_SUBTASKS_UNKNOWN("NumMinionSubtasksUnknown", true),
NUM_MINION_SUBTASKS_DROPPED("NumMinionSubtasksDropped", true),
NUM_MINION_SUBTASKS_TIMEDOUT("NumMinionSubtasksTimedOut", true),
NUM_MINION_SUBTASKS_TIMED_OUT("NumMinionSubtasksTimedOut", true),
NUM_MINION_SUBTASKS_ABORTED("NumMinionSubtasksAborted", true),
PERCENT_MINION_SUBTASKS_IN_QUEUE("PercentMinionSubtasksInQueue", true),
PERCENT_MINION_SUBTASKS_IN_ERROR("PercentMinionSubtasksInError", true),
Original file line number Diff line number Diff line change
@@ -41,6 +41,9 @@ public abstract class ControllerPrometheusMetricsTest extends PinotPrometheusMet
List.of(ControllerGauge.NUM_MINION_TASKS_IN_PROGRESS, ControllerGauge.NUM_MINION_SUBTASKS_RUNNING,
ControllerGauge.NUM_MINION_SUBTASKS_WAITING, ControllerGauge.NUM_MINION_SUBTASKS_ERROR,
ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN,
ControllerGauge.NUM_MINION_SUBTASKS_DROPPED,
ControllerGauge.NUM_MINION_SUBTASKS_TIMED_OUT,
ControllerGauge.NUM_MINION_SUBTASKS_ABORTED,
ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_ERROR);

//local gauges that accept partition
Original file line number Diff line number Diff line change
@@ -118,7 +118,7 @@ protected final void runTask(Properties periodicTaskProperties) {
taskTypeAccumulatedCount.getUnknown());
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.NUM_MINION_SUBTASKS_DROPPED, taskType,
taskTypeAccumulatedCount.getDropped());
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.NUM_MINION_SUBTASKS_TIMEDOUT, taskType,
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.NUM_MINION_SUBTASKS_TIMED_OUT, taskType,
taskTypeAccumulatedCount.getTimedOut());
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.NUM_MINION_SUBTASKS_ABORTED, taskType,
taskTypeAccumulatedCount.getAborted());
@@ -142,7 +142,7 @@ protected final void runTask(Properties periodicTaskProperties) {
_controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType,
ControllerGauge.NUM_MINION_SUBTASKS_DROPPED, taskCount.getDropped());
_controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType,
ControllerGauge.NUM_MINION_SUBTASKS_TIMEDOUT, taskCount.getTimedOut());
ControllerGauge.NUM_MINION_SUBTASKS_TIMED_OUT, taskCount.getTimedOut());
_controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType,
ControllerGauge.NUM_MINION_SUBTASKS_ABORTED, taskCount.getAborted());
int tableTotal = taskCount.getTotal();
@@ -181,7 +181,7 @@ protected final void runTask(Properties periodicTaskProperties) {
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_ERROR);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_DROPPED);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_TIMEDOUT);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_TIMED_OUT);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_ABORTED);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_ERROR);
@@ -214,7 +214,7 @@ private void removeTableTaskTypeMetrics(Set<String> tableNameWithTypeSet, String
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_ERROR);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_DROPPED);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_TIMEDOUT);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_TIMED_OUT);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_ABORTED);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType,
ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE);
Original file line number Diff line number Diff line change
@@ -84,7 +84,7 @@ public void taskType1ButNoInProgressTask() {
Mockito.when(_pinotHelixTaskResourceManager.getTasksInProgress(taskType)).thenReturn(ImmutableSet.of());
_taskMetricsEmitter.runTask(null);

Assert.assertEquals(metricsRegistry.allMetrics().size(), 8);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 11);
Assert.assertTrue(metricsRegistry.allMetrics().containsKey(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances")));
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
@@ -99,6 +99,18 @@ public void taskType1ButNoInProgressTask() {
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.numMinionSubtasksError.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.numMinionSubtasksUnknown.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.numMinionSubtasksDropped.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.numMinionSubtasksTimedOut.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.numMinionSubtasksAborted.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.percentMinionSubtasksInQueue.taskType1"))
.getMetric()).value(), 0L);
@@ -144,7 +156,7 @@ public void taskType1WithTwoTablesEmitMetricTwice() {
private void runAndAssertForTaskType1WithTwoTables() {
PinotMetricsRegistry metricsRegistry = _controllerMetrics.getMetricsRegistry();
_taskMetricsEmitter.runTask(null);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 20);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 29);

Assert.assertTrue(metricsRegistry.allMetrics().containsKey(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances")));
@@ -160,6 +172,9 @@ private void runAndAssertForTaskType1WithTwoTables() {
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.numMinionSubtasksError.taskType1"))
.getMetric()).value(), 1L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.numMinionSubtasksDropped.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.percentMinionSubtasksInQueue.taskType1"))
.getMetric()).value(), 50L);
@@ -179,6 +194,10 @@ private void runAndAssertForTaskType1WithTwoTables() {
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.numMinionSubtasksError.table1_OFFLINE.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.numMinionSubtasksDropped.table1_OFFLINE.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.percentMinionSubtasksInQueue.table1_OFFLINE.taskType1"))
@@ -200,6 +219,10 @@ private void runAndAssertForTaskType1WithTwoTables() {
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.numMinionSubtasksError.table2_OFFLINE.taskType1"))
.getMetric()).value(), 1L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.numMinionSubtasksDropped.table2_OFFLINE.taskType1"))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class,
"pinot.controller.percentMinionSubtasksInQueue.table2_OFFLINE.taskType1"))
@@ -231,7 +254,7 @@ private void oneTaskTypeWithOneTable(String taskType, String taskName1, String t

PinotMetricsRegistry metricsRegistry = _controllerMetrics.getMetricsRegistry();
_taskMetricsEmitter.runTask(null);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 14);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 20);

Assert.assertTrue(metricsRegistry.allMetrics().containsKey(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances")));
@@ -251,6 +274,10 @@ private void oneTaskTypeWithOneTable(String taskType, String taskName1, String t
new YammerMetricName(ControllerMetrics.class,
String.format("pinot.controller.numMinionSubtasksError.%s", taskType)))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class,
String.format("pinot.controller.numMinionSubtasksDropped.%s", taskType)))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class,
String.format("pinot.controller.percentMinionSubtasksInQueue.%s", taskType)))
@@ -272,6 +299,10 @@ private void oneTaskTypeWithOneTable(String taskType, String taskName1, String t
new YammerMetricName(ControllerMetrics.class,
String.format("pinot.controller.numMinionSubtasksError.%s.%s", tableName, taskType)))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class,
String.format("pinot.controller.numMinionSubtasksDropped.%s.%s", tableName, taskType)))
.getMetric()).value(), 0L);
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
new YammerMetricName(ControllerMetrics.class,
String.format("pinot.controller.percentMinionSubtasksInQueue.%s.%s", tableName, taskType)))