Skip to content

Commit

Permalink
[lrs] Use rq_total to decide whether to upload a locality_stats to LR…
Browse files Browse the repository at this point in the history
…S server. (envoyproxy#38094)

Commit Message: Use rq_total to decide whether to upload a
locality_stats to LRS server.
Additional Description: see issue envoyproxy#37544 for more details. rq_total is a
more reliable metric for reporting load.

Risk Level: LOW, LRS server receives more honest load reports, but its
has the choice to decide how to interpret rq_issued vs
(rq_error+rq_active+rq_success).
Testing: existing unit tests
Docs Changes:
Release Notes:
Platform Specific Features:
Runtime guard: envoy.reloadable_features.report_load_with_rq_issued
Fixes envoyproxy#37544

---------

Signed-off-by: Xin Zhuang <[email protected]>
  • Loading branch information
stevenzzzz authored Jan 22, 2025
1 parent 9575fad commit 6463b12
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 2 deletions.
4 changes: 4 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,9 @@ new_features:
- area: redis
change: |
Added support for multi-key commands on transactions.
- area: xds
change: |
Reporting a locality_stats to LRS server when rq_issued > 0, disable by setting runtime guard
``envoy.reloadable_features.report_load_with_rq_issued`` to ``false``.
deprecated:
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ RUNTIME_GUARD(envoy_reloadable_features_quic_send_server_preferred_address_to_al
RUNTIME_GUARD(envoy_reloadable_features_quic_support_certificate_compression);
RUNTIME_GUARD(envoy_reloadable_features_quic_upstream_reads_fixed_number_packets);
RUNTIME_GUARD(envoy_reloadable_features_quic_upstream_socket_use_address_cache_for_read);
RUNTIME_GUARD(envoy_reloadable_features_report_load_with_rq_issued);
RUNTIME_GUARD(envoy_reloadable_features_report_stream_reset_error_code);
RUNTIME_GUARD(envoy_reloadable_features_sanitize_sni_in_access_log);
RUNTIME_GUARD(envoy_reloadable_features_shadow_policy_inherit_trace_sampling);
Expand Down
7 changes: 6 additions & 1 deletion source/common/upstream/load_stats_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,12 @@ void LoadStatsReporter::sendLoadStatsRequest() {
}
}
}
if (rq_success + rq_error + rq_active != 0) {
bool should_send_locality_stats = rq_success + rq_error + rq_active != 0;
if (Runtime::runtimeFeatureEnabled(
"envoy.reloadable_features.report_load_with_rq_issued")) {
should_send_locality_stats = rq_issued != 0;
}
if (should_send_locality_stats) {
auto* locality_stats = cluster_stats->add_upstream_locality_stats();
locality_stats->mutable_locality()->MergeFrom(hosts[0]->locality());
locality_stats->set_priority(host_set->priority());
Expand Down
1 change: 1 addition & 0 deletions test/common/upstream/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ envoy_cc_test(
"//test/mocks/upstream:cluster_manager_mocks",
"//test/mocks/upstream:cluster_priority_set_mocks",
"//test/test_common:simulated_time_system_lib",
"//test/test_common:test_runtime_lib",
"//test/test_common:utility_lib",
"@envoy_api//envoy/config/endpoint/v3:pkg_cc_proto",
"@envoy_api//envoy/service/load_stats/v3:pkg_cc_proto",
Expand Down
33 changes: 32 additions & 1 deletion test/common/upstream/load_stats_reporter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "test/mocks/upstream/cluster_manager.h"
#include "test/mocks/upstream/cluster_priority_set.h"
#include "test/test_common/simulated_time_system.h"
#include "test/test_common/test_runtime.h"
#include "test/test_common/utility.h"

#include "gmock/gmock.h"
Expand Down Expand Up @@ -252,6 +253,9 @@ HostSharedPtr makeTestHost(const std::string& hostname,
}

void addStats(const HostSharedPtr& host, double a, double b = 0, double c = 0, double d = 0) {
if (Runtime::runtimeFeatureEnabled("envoy.reloadable_features.report_load_with_rq_issued")) {
host->stats().rq_total_.inc();
}
host->stats().rq_success_.inc();
host->loadMetricStats().add("metric_a", a);
if (b != 0) {
Expand All @@ -274,8 +278,22 @@ void addStatExpectation(envoy::config::endpoint::v3::UpstreamLocalityStats* stat
metric->set_total_metric_value(total_metric_value);
}

class LoadStatsReporterTestWithRqTotal : public LoadStatsReporterTest,
public testing::WithParamInterface<bool> {
public:
LoadStatsReporterTestWithRqTotal() {
scoped_runtime_.mergeValues(
{{"envoy.reloadable_features.report_load_with_rq_issued", GetParam() ? "true" : "false"}});
}
TestScopedRuntime scoped_runtime_;
};

INSTANTIATE_TEST_SUITE_P(LoadStatsReporterTestWithRqTotal, LoadStatsReporterTestWithRqTotal,
::testing::Bool());

// Validate that per-locality metrics are aggregated across hosts and included in the load report.
TEST_F(LoadStatsReporterTest, UpstreamLocalityStats) {
TEST_P(LoadStatsReporterTestWithRqTotal, UpstreamLocalityStats) {
bool expects_rq_total = GetParam();
EXPECT_CALL(*async_client_, startRaw(_, _, _, _)).WillOnce(Return(&async_stream_));
expectSendMessage({});
createLoadStatsReporter();
Expand Down Expand Up @@ -313,13 +331,19 @@ TEST_F(LoadStatsReporterTest, UpstreamLocalityStats) {
auto expected_locality0_stats = expected_cluster_stats.add_upstream_locality_stats();
expected_locality0_stats->mutable_locality()->set_region("mars");
expected_locality0_stats->set_total_successful_requests(3);
if (expects_rq_total) {
expected_locality0_stats->set_total_issued_requests(3);
}
addStatExpectation(expected_locality0_stats, "metric_a", 3, 0.88888);
addStatExpectation(expected_locality0_stats, "metric_b", 2, 1.12345);
addStatExpectation(expected_locality0_stats, "metric_c", 1, 3.14159);

auto expected_locality1_stats = expected_cluster_stats.add_upstream_locality_stats();
expected_locality1_stats->mutable_locality()->set_region("jupiter");
expected_locality1_stats->set_total_successful_requests(1);
if (expects_rq_total) {
expected_locality1_stats->set_total_issued_requests(1);
}
addStatExpectation(expected_locality1_stats, "metric_a", 1, 10.01);
addStatExpectation(expected_locality1_stats, "metric_c", 1, 20.02);
addStatExpectation(expected_locality1_stats, "metric_d", 1, 30.03);
Expand All @@ -331,6 +355,10 @@ TEST_F(LoadStatsReporterTest, UpstreamLocalityStats) {

// Traffic between previous request and next response. Previous latched metrics are cleared.
host1->stats().rq_success_.inc();

if (expects_rq_total) {
host1->stats().rq_total_.inc();
}
host1->loadMetricStats().add("metric_a", 1.41421);
host1->loadMetricStats().add("metric_e", 2.71828);

Expand All @@ -348,6 +376,9 @@ TEST_F(LoadStatsReporterTest, UpstreamLocalityStats) {
auto expected_locality0_stats = expected_cluster_stats.add_upstream_locality_stats();
expected_locality0_stats->mutable_locality()->set_region("mars");
expected_locality0_stats->set_total_successful_requests(1);
if (expects_rq_total) {
expected_locality0_stats->set_total_issued_requests(1);
}
addStatExpectation(expected_locality0_stats, "metric_a", 1, 1.41421);
addStatExpectation(expected_locality0_stats, "metric_e", 1, 2.71828);

Expand Down

0 comments on commit 6463b12

Please sign in to comment.