Skip to content

Commit b18b993

Browse files
authored
overload_manager: make proactive resource monitors trigger actions (#42018)
Also, publish a resource pressure gauge like normal resource monitors. Signed-off-by: Greg Greenway <[email protected]>
1 parent fdeffee commit b18b993

File tree

5 files changed

+81
-6
lines changed

5 files changed

+81
-6
lines changed

changelogs/current.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ minor_behavior_changes:
3232
<envoy_v3_api_field_config.route.v3.RouteAction.path_rewrite>` to
3333
:ref:`RouteAction <envoy_v3_api_msg_config.route.v3.RouteAction>` to support substitution
3434
formatting for path header rewriting.
35+
- area: overload_manager
36+
change: |
37+
Fixed :ref:`downstream connections monitor
38+
<envoy_v3_api_msg_extensions.resource_monitors.downstream_connections.v3.DownstreamConnectionsConfig>`
39+
to correctly trigger configured actions, and create a ``pressure`` metric like other resource monitors.
40+
Previously, this extension would never trigger an action.
3541
- area: tracing
3642
change: |
3743
The :ref:`request header custom tag <envoy_v3_api_field_type.tracing.v3.CustomTag.request_header>`

envoy/server/overload/thread_local_overload_state.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ class OverloadProactiveResourceNameValues {
2828
proactive_action_name_to_resource_ = {
2929
{GlobalDownstreamMaxConnections,
3030
OverloadProactiveResourceName::GlobalDownstreamMaxConnections}};
31+
32+
const std::string& resourceToName(OverloadProactiveResourceName resource) const {
33+
switch (resource) {
34+
case OverloadProactiveResourceName::GlobalDownstreamMaxConnections:
35+
return GlobalDownstreamMaxConnections;
36+
}
37+
PANIC_DUE_TO_CORRUPT_ENUM;
38+
}
3139
};
3240

3341
using OverloadProactiveResources = ConstSingleton<OverloadProactiveResourceNameValues>;

envoy/server/proactive_resource_monitor.h

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ class ProactiveResource {
5050
ProactiveResource(const std::string& name, ProactiveResourceMonitorPtr monitor,
5151
Stats::Scope& stats_scope)
5252
: name_(name), monitor_(std::move(monitor)),
53-
failed_updates_counter_(makeCounter(stats_scope, name, "failed_updates")) {}
53+
failed_updates_counter_(makeCounter(stats_scope, name, "failed_updates")),
54+
pressure_gauge_(makeGauge(stats_scope, name, "pressure")) {}
5455

5556
bool tryAllocateResource(int64_t increment) {
5657
if (monitor_->tryAllocateResource(increment)) {
@@ -70,6 +71,13 @@ class ProactiveResource {
7071
}
7172
}
7273

74+
double updateResourcePressure() {
75+
const double pressure = static_cast<double>(monitor_->currentResourceUsage()) /
76+
static_cast<double>(monitor_->maxResourceUsage());
77+
pressure_gauge_.set(pressure * 100);
78+
return pressure;
79+
}
80+
7381
ProactiveResourceMonitorOptRef getProactiveResourceMonitorForTest() {
7482
return makeOptRefFromPtr<ProactiveResourceMonitor>(monitor_.get());
7583
};
@@ -78,12 +86,22 @@ class ProactiveResource {
7886
const std::string name_;
7987
ProactiveResourceMonitorPtr monitor_;
8088
Stats::Counter& failed_updates_counter_;
89+
Stats::Gauge& pressure_gauge_;
90+
91+
Stats::StatNameManagedStorage makeName(Stats::Scope& scope, absl::string_view a,
92+
absl::string_view b) {
93+
return {absl::StrCat("overload.", a, ".", b), scope.symbolTable()};
94+
}
8195

8296
Stats::Counter& makeCounter(Stats::Scope& scope, absl::string_view a, absl::string_view b) {
83-
Stats::StatNameManagedStorage stat_name(absl::StrCat("overload.", a, ".", b),
84-
scope.symbolTable());
97+
auto stat_name = makeName(scope, a, b);
8598
return scope.counterFromStatName(stat_name.statName());
8699
}
100+
101+
Stats::Gauge& makeGauge(Stats::Scope& scope, absl::string_view a, absl::string_view b) {
102+
auto stat_name = makeName(scope, a, b);
103+
return scope.gaugeFromStatName(stat_name.statName(), Stats::Gauge::ImportMode::NeverImport);
104+
}
87105
};
88106

89107
} // namespace Server

source/server/overload_manager_impl.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,12 +581,18 @@ void OverloadManagerImpl::start() {
581581
// Start a new flush epoch. If all resource updates complete before this callback runs, the last
582582
// resource update will call flushResourceUpdates to flush the whole batch early.
583583
++flush_epoch_;
584-
flush_awaiting_updates_ = resources_.size();
584+
flush_awaiting_updates_ = resources_.size() + proactive_resources_->size();
585585

586586
for (auto& resource : resources_) {
587587
resource.second.update(flush_epoch_);
588588
}
589589

590+
for (auto& resource : *proactive_resources_) {
591+
const double pressure = resource.second.updateResourcePressure();
592+
updateResourcePressure(OverloadProactiveResources::get().resourceToName(resource.first),
593+
pressure, flush_epoch_);
594+
}
595+
590596
// Record delay.
591597
auto now = time_source_.monotonicTime();
592598
std::chrono::milliseconds delay =

test/server/overload_manager_impl_test.cc

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,9 @@ constexpr char proactiveResourceConfig[] = R"YAML(
299299
actions:
300300
- name: envoy.overload_actions.shrink_heap
301301
triggers:
302-
- name: envoy.resource_monitors.fake_resource1
302+
- name: envoy.resource_monitors.global_downstream_max_connections
303303
threshold:
304-
value: 0.9
304+
value: 0.5
305305
)YAML";
306306

307307
TEST_F(OverloadManagerImplTest, CallbackOnlyFiresWhenStateChanges) {
@@ -930,12 +930,21 @@ TEST_F(OverloadManagerImplTest, ProactiveResourceAllocateAndDeallocateResourceTe
930930
Stats::Counter& failed_updates =
931931
stats_.counter("overload.envoy.resource_monitors.global_downstream_max_connections."
932932
"failed_updates");
933+
Stats::Gauge& pressure =
934+
stats_.gauge("overload.envoy.resource_monitors.global_downstream_max_connections.pressure",
935+
Stats::Gauge::ImportMode::NeverImport);
936+
933937
manager->start();
934938
EXPECT_TRUE(manager->getThreadLocalOverloadState().isResourceMonitorEnabled(
935939
OverloadProactiveResourceName::GlobalDownstreamMaxConnections));
936940
bool resource_allocated = manager->getThreadLocalOverloadState().tryAllocateResource(
937941
Server::OverloadProactiveResourceName::GlobalDownstreamMaxConnections, 1);
938942
EXPECT_TRUE(resource_allocated);
943+
944+
EXPECT_EQ(pressure.value(), 0);
945+
timer_cb_();
946+
EXPECT_EQ(pressure.value(), 33);
947+
939948
auto monitor = manager->getThreadLocalOverloadState().getProactiveResourceMonitorForTest(
940949
Server::OverloadProactiveResourceName::GlobalDownstreamMaxConnections);
941950
EXPECT_NE(absl::nullopt, monitor);
@@ -954,6 +963,34 @@ TEST_F(OverloadManagerImplTest, ProactiveResourceAllocateAndDeallocateResourceTe
954963
manager->stop();
955964
}
956965

966+
// Test that proactive monitors trigger the configured actions when they reach the threshold.
967+
TEST_F(OverloadManagerImplTest, ProactiveResourceTriggers) {
968+
setDispatcherExpectation();
969+
970+
auto manager(createOverloadManager(proactiveResourceConfig));
971+
bool is_active = false;
972+
manager->registerForAction("envoy.overload_actions.shrink_heap", dispatcher_,
973+
[&](OverloadActionState state) { is_active = state.isSaturated(); });
974+
975+
manager->start();
976+
977+
// Trigger threshold is 50%, max is 3.
978+
979+
ASSERT_TRUE(factory5_.monitor_->tryAllocateResource(1));
980+
timer_cb_();
981+
EXPECT_FALSE(is_active);
982+
983+
ASSERT_TRUE(factory5_.monitor_->tryAllocateResource(1));
984+
timer_cb_();
985+
EXPECT_TRUE(is_active);
986+
987+
ASSERT_TRUE(factory5_.monitor_->tryDeallocateResource(1));
988+
timer_cb_();
989+
EXPECT_FALSE(is_active);
990+
991+
manager->stop();
992+
}
993+
957994
class OverloadManagerSimulatedTimeTest : public OverloadManagerImplTest,
958995
public Envoy::Event::TestUsingSimulatedTime {};
959996

0 commit comments

Comments
 (0)