Skip to content

Commit 57b5c9c

Browse files
authored
prometheus_remote_write: Add cutoff for outdated metrics (#183)
This is because the official Prometheus node_exporter should remove the outdated metrics because it creates the storages which should be handling/storing metrics' information on every cycles. At least, we should cut off for 1 hour or older metrics to prevent metric too old errors on prometheus remote write mechanism. Signed-off-by: Hiroshi Hatake <[email protected]>
1 parent 82cf0ce commit 57b5c9c

File tree

3 files changed

+98
-5
lines changed

3 files changed

+98
-5
lines changed

include/cmetrics/cmt_encode_prometheus_remote_write.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@
2525
#include <prometheus_remote_write/remote.pb-c.h>
2626

2727
#define CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_ADD_METADATA CMT_FALSE
28+
#define CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_THRESHOLD 60L*60L*1000000000L
2829

2930
#define CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS 0
3031
#define CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_ALLOCATION_ERROR 1
3132
#define CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_UNEXPECTED_ERROR 2
3233
#define CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_INVALID_ARGUMENT_ERROR 3
3334
#define CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_UNEXPECTED_METRIC_TYPE 4
35+
#define CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR 5
3436

3537
struct cmt_prometheus_metric_metadata {
3638
Prometheus__MetricMetadata data;

src/cmt_encode_prometheus_remote_write.c

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -644,18 +644,38 @@ int pack_basic_metric_sample(struct cmt_prometheus_remote_write_context *context
644644
return append_metric_to_timeseries(time_series, metric);
645645
}
646646

647+
static int check_staled_timestamp(struct cmt_metric *metric, uint64_t now, uint64_t cutoff)
648+
{
649+
uint64_t ts;
650+
uint64_t diff;
651+
652+
ts = cmt_metric_get_timestamp(metric);
653+
diff = now - ts;
654+
655+
return diff > cutoff;
656+
}
657+
647658
int pack_basic_type(struct cmt_prometheus_remote_write_context *context,
648659
struct cmt_map *map)
649660
{
650661
int add_metadata;
651662
struct cmt_metric *metric;
652663
int result;
653664
struct cfl_list *head;
665+
uint64_t now;
654666

655667
context->sequence_number++;
656668
add_metadata = CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_ADD_METADATA;
657669

670+
now = cfl_time_now();
671+
658672
if (map->metric_static_set == CMT_TRUE) {
673+
if (check_staled_timestamp(&map->metric, now,
674+
CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_THRESHOLD)) {
675+
/* Skip processing metrics which are staled over the threshold */
676+
return CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR;
677+
}
678+
659679
result = pack_basic_metric_sample(context, map, &map->metric, add_metadata);
660680

661681
if (result != CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS) {
@@ -666,6 +686,12 @@ int pack_basic_type(struct cmt_prometheus_remote_write_context *context,
666686
cfl_list_foreach(head, &map->metrics) {
667687
metric = cfl_list_entry(head, struct cmt_metric, _head);
668688

689+
if (check_staled_timestamp(metric, now,
690+
CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_THRESHOLD)) {
691+
/* Skip processing metrics which are staled over over the threshold */
692+
return CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR;
693+
}
694+
669695
result = pack_basic_metric_sample(context, map, metric, add_metadata);
670696

671697
if (result != CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS) {
@@ -699,6 +725,15 @@ int pack_complex_metric_sample(struct cmt_prometheus_remote_write_context *conte
699725
struct cmt_summary *summary;
700726
int result;
701727
size_t index;
728+
uint64_t now;
729+
730+
now = cfl_time_now();
731+
732+
if (check_staled_timestamp(metric, now,
733+
CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_THRESHOLD)) {
734+
/* Skip processing metrics which are staled over the threshold */
735+
return CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR;
736+
}
702737

703738
additional_label_caption = cfl_sds_create_len(NULL, 128);
704739

@@ -1067,6 +1102,10 @@ cfl_sds_t cmt_encode_prometheus_remote_write_create(struct cmt *cmt)
10671102
counter = cfl_list_entry(head, struct cmt_counter, _head);
10681103
result = pack_basic_type(&context, counter->map);
10691104

1105+
if (result == CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR) {
1106+
continue;
1107+
}
1108+
10701109
if (result != CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS) {
10711110
break;
10721111
}
@@ -1078,6 +1117,10 @@ cfl_sds_t cmt_encode_prometheus_remote_write_create(struct cmt *cmt)
10781117
gauge = cfl_list_entry(head, struct cmt_gauge, _head);
10791118
result = pack_basic_type(&context, gauge->map);
10801119

1120+
if (result == CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR) {
1121+
continue;
1122+
}
1123+
10811124
if (result != CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS) {
10821125
break;
10831126
}
@@ -1089,6 +1132,10 @@ cfl_sds_t cmt_encode_prometheus_remote_write_create(struct cmt *cmt)
10891132
cfl_list_foreach(head, &cmt->untypeds) {
10901133
untyped = cfl_list_entry(head, struct cmt_untyped, _head);
10911134
pack_basic_type(&context, untyped->map);
1135+
1136+
if (result == CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR) {
1137+
continue;
1138+
}
10921139
}
10931140
}
10941141

@@ -1098,6 +1145,10 @@ cfl_sds_t cmt_encode_prometheus_remote_write_create(struct cmt *cmt)
10981145
summary = cfl_list_entry(head, struct cmt_summary, _head);
10991146
result = pack_complex_type(&context, summary->map);
11001147

1148+
if (result == CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR) {
1149+
continue;
1150+
}
1151+
11011152
if (result != CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS) {
11021153
break;
11031154
}
@@ -1110,13 +1161,18 @@ cfl_sds_t cmt_encode_prometheus_remote_write_create(struct cmt *cmt)
11101161
histogram = cfl_list_entry(head, struct cmt_histogram, _head);
11111162
result = pack_complex_type(&context, histogram->map);
11121163

1164+
if (result == CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR) {
1165+
continue;
1166+
}
1167+
11131168
if (result != CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS) {
11141169
break;
11151170
}
11161171
}
11171172
}
11181173

1119-
if (result == CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS) {
1174+
if (result == CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_SUCCESS ||
1175+
result == CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_ERROR) {
11201176
buf = render_remote_write_context_to_sds(&context);
11211177
}
11221178

tests/encoding.c

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,17 @@ static struct cmt *generate_simple_encoder_test_data()
6666
return cmt;
6767
}
6868

69-
static struct cmt *generate_encoder_test_data()
69+
static struct cmt *generate_encoder_test_data_with_timestamp(uint64_t ts)
7070
{
7171
double quantiles[5];
7272
struct cmt_histogram_buckets *buckets;
7373
double val;
7474
struct cmt *cmt;
75-
uint64_t ts;
7675
struct cmt_gauge *g1;
7776
struct cmt_counter *c1;
7877
struct cmt_summary *s1;
7978
struct cmt_histogram *h1;
8079

81-
ts = 0;
8280
cmt = cmt_create();
8381

8482
c1 = cmt_counter_create(cmt, "kubernetes", "network", "load_counter", "Network load counter",
@@ -158,6 +156,11 @@ static struct cmt *generate_encoder_test_data()
158156
return cmt;
159157
}
160158

159+
static struct cmt *generate_encoder_test_data()
160+
{
161+
return generate_encoder_test_data_with_timestamp(0);
162+
}
163+
161164
/*
162165
* perform the following data encoding and compare msgpack buffsers
163166
*
@@ -511,10 +514,13 @@ void test_prometheus_remote_write()
511514
struct cmt *cmt;
512515
cfl_sds_t payload;
513516
FILE *sample_file;
517+
uint64_t ts;
518+
519+
ts = cfl_time_now();
514520

515521
cmt_initialize();
516522

517-
cmt = generate_encoder_test_data();
523+
cmt = generate_encoder_test_data_with_timestamp(ts);
518524

519525
payload = cmt_encode_prometheus_remote_write_create(cmt);
520526
TEST_CHECK(NULL != payload);
@@ -544,6 +550,34 @@ curl -v 'http://localhost:9090/receive' -H 'Content-Type: application/x-protobuf
544550
cmt_destroy(cmt);
545551
}
546552

553+
void test_prometheus_remote_write_with_outdated_timestamps()
554+
{
555+
struct cmt *cmt;
556+
cfl_sds_t payload;
557+
uint64_t ts;
558+
559+
ts = cfl_time_now() - CMT_ENCODE_PROMETHEUS_REMOTE_WRITE_CUTOFF_THRESHOLD * 1.5;
560+
561+
cmt_initialize();
562+
563+
cmt = generate_encoder_test_data_with_timestamp(ts);
564+
565+
payload = cmt_encode_prometheus_remote_write_create(cmt);
566+
TEST_CHECK(NULL != payload);
567+
568+
if (payload == NULL) {
569+
cmt_destroy(cmt);
570+
571+
return;
572+
}
573+
574+
TEST_CHECK(0 == cfl_sds_len(payload));
575+
576+
cmt_encode_prometheus_remote_write_destroy(payload);
577+
578+
cmt_destroy(cmt);
579+
}
580+
547581
void test_opentelemetry()
548582
{
549583
cfl_sds_t payload;
@@ -1081,6 +1115,7 @@ TEST_LIST = {
10811115
{"cmt_msgpack_cleanup_on_error", test_cmt_to_msgpack_cleanup_on_error},
10821116
{"cmt_msgpack_partial_processing", test_cmt_msgpack_partial_processing},
10831117
{"prometheus_remote_write", test_prometheus_remote_write},
1118+
{"prometheus_remote_write_old_cmt",test_prometheus_remote_write_with_outdated_timestamps},
10841119
{"cmt_msgpack_stability", test_cmt_to_msgpack_stability},
10851120
{"cmt_msgpack_integrity", test_cmt_to_msgpack_integrity},
10861121
{"cmt_msgpack_labels", test_cmt_to_msgpack_labels},

0 commit comments

Comments
 (0)