Skip to content

Commit

Permalink
Merge pull request #74 from gonzalezzfelipe/fix/autoscaler-logging-an…
Browse files Browse the repository at this point in the history
…d-max-batch
  • Loading branch information
scarmuega authored Nov 25, 2024
2 parents 9fcdc30 + c1b683c commit 89acc3f
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 6 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,17 @@ jobs:
region_prefix: a
autoscaler_high_watermark: 5
autoscaler_low_watermark: 1
autoscaler_max_batch: 2
- region: eu-central-1
region_prefix: b
autoscaler_high_watermark: 5
autoscaler_low_watermark: 1
autoscaler_max_batch: 2
- region: us-west-2
region_prefix: c
autoscaler_high_watermark: 5
autoscaler_low_watermark: 1
autoscaler_max_batch: 2

env:
# Secrets
Expand All @@ -169,6 +172,7 @@ jobs:
TF_VAR_autoscaler_high_watermark: ${{ matrix.autoscaler_high_watermark }}
TF_VAR_autoscaler_low_watermark: ${{ matrix.autoscaler_low_watermark }}
TF_VAR_autoscaler_region_prefix: ${{ matrix.region_prefix }}
TF_VAR_autoscaler_max_batch: ${{ matrix.autoscaler_max_batch }}

steps:
- name: Filter regions
Expand Down
5 changes: 5 additions & 0 deletions bootstrap/stage2/deployment.tf
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@ resource "kubernetes_deployment_v1" "operator" {
value = var.autoscaler_region_prefix
}

env {
name = "AUTOSCALER_MAX_BATCH"
value = var.autoscaler_max_batch
}

resources {
limits = {
cpu = var.resources.limits.cpu
Expand Down
4 changes: 2 additions & 2 deletions bootstrap/stage2/frontend.tf
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ resource "kubernetes_deployment_v1" "frontend" {
}

env {
name = "VITE_API_BASE_URL"
name = "VITE_API_BASE_URL"
value = "https://staging-rewardengine.dripdropz.io/api/v1"
}

env {
name = "VITE_API_KEY"
name = "VITE_API_KEY"
value = "067d20be-8baa-49cb-b501-e004af358870"
}

Expand Down
4 changes: 4 additions & 0 deletions bootstrap/stage2/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ variable "autoscaler_high_watermark" {
default = 5
}

variable "autoscaler_max_batch" {
type = number
}

variable "tolerations" {
type = list(object({
effect = string
Expand Down
4 changes: 4 additions & 0 deletions crates/operator/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub struct Config {
pub autoscaler_low_watermark: usize,
pub autoscaler_high_watermark: usize,
pub autoscaler_region_prefix: String,
pub autoscaler_max_batch: usize,
}

impl Config {
Expand Down Expand Up @@ -81,6 +82,9 @@ impl Config {
.expect("Missing AUTOSCALER_LOW_WATERMARK env var."),
autoscaler_region_prefix: env::var("AUTOSCALER_REGION_PREFIX")
.expect("Missing AUTOSCALER_REGION_PREFIX env var."),
autoscaler_max_batch: env::var("AUTOSCALER_MAX_BATCH")
.map(|x| x.parse().expect("Failed to parse AUTOSCALER_MAX_BATCH"))
.expect("Missing AUTOSCALER_MAX_BATCH env var."),
}
}
}
27 changes: 23 additions & 4 deletions crates/operator/src/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use kube::{
};
use rand::{distributions::Alphanumeric, Rng};
use serde_json::json;
use std::{collections::BTreeMap, sync::Arc, time::Duration};
use std::{cmp::min, collections::BTreeMap, sync::Arc, time::Duration};
use thiserror::Error;
use tracing::{error, info, warn};

Expand Down Expand Up @@ -471,8 +471,6 @@ impl K8sContext {
}

pub async fn deploy_node(&self) -> anyhow::Result<HydraDoomNode> {
info!("Deploying new node.");

// List available snapshots.
// Try move from available to used dir.
// If successful, start new node.
Expand All @@ -485,6 +483,7 @@ impl K8sContext {
"0", // 1 for online, 0 for offline
random_name().to_lowercase()
);
info!("Deploying new node: {}", name);
let new_node = HydraDoomNode {
spec: HydraDoomNodeSpec::default(),
status: None,
Expand Down Expand Up @@ -522,15 +521,35 @@ impl K8sContext {
None => false,
})
.collect();
info!(
"Amount of nodes in waiting state: {}",
available_hydra_nodes.len()
);

if available_hydra_nodes.len() < self.config.autoscaler_low_watermark {
let amount = available_hydra_nodes.len() - self.config.autoscaler_low_watermark;
info!(
existing = available_hydra_nodes.len(),
desired = self.config.autoscaler_low_watermark,
"Scaling out amount of hydra nodes...",
);
let amount = min(
self.config.autoscaler_low_watermark - available_hydra_nodes.len(),
self.config.autoscaler_max_batch,
);

info!("About to scale the amount of Hydra nodes by {}", amount);

// One after the other to avoid race conditions.
for _ in 0..amount {
self.deploy_node().await?;
}
} else if available_hydra_nodes.len() > self.config.autoscaler_high_watermark {
while available_hydra_nodes.len() > self.config.autoscaler_high_watermark {
info!(
current = available_hydra_nodes.len(),
desired = self.config.autoscaler_high_watermark,
"Removing a Hydra Node..."
);
// High watermark will never be < 1.
self.remove_node(&available_hydra_nodes.pop().unwrap())
.await?;
Expand Down
6 changes: 6 additions & 0 deletions playbook/doom-dev/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ variable "autoscaler_region_prefix" {
type = string
}

variable "autoscaler_max_batch" {
type = number
default = 2
}

provider "kubernetes" {
config_path = "~/.kube/config"
config_context = var.eks_cluster_arn
Expand Down Expand Up @@ -159,4 +164,5 @@ module "stage2" {
autoscaler_high_watermark = var.autoscaler_high_watermark
autoscaler_low_watermark = var.autoscaler_low_watermark
autoscaler_region_prefix = var.autoscaler_region_prefix
autoscaler_max_batch = var.autoscaler_max_batch
}

0 comments on commit 89acc3f

Please sign in to comment.