Merge pull request #74 from gonzalezzfelipe/fix/autoscaler-logging-an…

…d-max-batch
cardano-scaling · Nov 25, 2024 · 89acc3f · 89acc3f
2 parents 9fcdc30 + c1b683c
commit 89acc3f
Show file tree

Hide file tree

Showing 7 changed files with 48 additions and 6 deletions.
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -137,14 +137,17 @@ jobs:
             region_prefix: a
             autoscaler_high_watermark: 5
             autoscaler_low_watermark: 1
+            autoscaler_max_batch: 2
           - region: eu-central-1
             region_prefix: b
             autoscaler_high_watermark: 5
             autoscaler_low_watermark: 1
+            autoscaler_max_batch: 2
           - region: us-west-2
             region_prefix: c
             autoscaler_high_watermark: 5
             autoscaler_low_watermark: 1
+            autoscaler_max_batch: 2
 
     env:
       # Secrets
@@ -169,6 +172,7 @@ jobs:
       TF_VAR_autoscaler_high_watermark: ${{ matrix.autoscaler_high_watermark }}
       TF_VAR_autoscaler_low_watermark: ${{ matrix.autoscaler_low_watermark }}
       TF_VAR_autoscaler_region_prefix: ${{ matrix.region_prefix }}
+      TF_VAR_autoscaler_max_batch: ${{ matrix.autoscaler_max_batch }}
 
     steps:
       - name: Filter regions

diff --git a/bootstrap/stage2/deployment.tf b/bootstrap/stage2/deployment.tf
@@ -166,6 +166,11 @@ resource "kubernetes_deployment_v1" "operator" {
             value = var.autoscaler_region_prefix
           }
 
+          env {
+            name  = "AUTOSCALER_MAX_BATCH"
+            value = var.autoscaler_max_batch
+          }
+
           resources {
             limits = {
               cpu    = var.resources.limits.cpu

diff --git a/bootstrap/stage2/frontend.tf b/bootstrap/stage2/frontend.tf
@@ -37,12 +37,12 @@ resource "kubernetes_deployment_v1" "frontend" {
           }
 
           env {
-            name = "VITE_API_BASE_URL"
+            name  = "VITE_API_BASE_URL"
             value = "https://staging-rewardengine.dripdropz.io/api/v1"
           }
 
           env {
-            name = "VITE_API_KEY"
+            name  = "VITE_API_KEY"
             value = "067d20be-8baa-49cb-b501-e004af358870"
           }
 

diff --git a/bootstrap/stage2/main.tf b/bootstrap/stage2/main.tf
@@ -144,6 +144,10 @@ variable "autoscaler_high_watermark" {
   default = 5
 }
 
+variable "autoscaler_max_batch" {
+  type = number
+}
+
 variable "tolerations" {
   type = list(object({
     effect   = string

diff --git a/crates/operator/src/config.rs b/crates/operator/src/config.rs
@@ -36,6 +36,7 @@ pub struct Config {
     pub autoscaler_low_watermark: usize,
     pub autoscaler_high_watermark: usize,
     pub autoscaler_region_prefix: String,
+    pub autoscaler_max_batch: usize,
 }
 
 impl Config {
@@ -81,6 +82,9 @@ impl Config {
                 .expect("Missing AUTOSCALER_LOW_WATERMARK env var."),
             autoscaler_region_prefix: env::var("AUTOSCALER_REGION_PREFIX")
                 .expect("Missing AUTOSCALER_REGION_PREFIX env var."),
+            autoscaler_max_batch: env::var("AUTOSCALER_MAX_BATCH")
+                .map(|x| x.parse().expect("Failed to parse AUTOSCALER_MAX_BATCH"))
+                .expect("Missing AUTOSCALER_MAX_BATCH env var."),
         }
     }
 }
diff --git a/crates/operator/src/controller.rs b/crates/operator/src/controller.rs
@@ -11,7 +11,7 @@ use kube::{
 };
 use rand::{distributions::Alphanumeric, Rng};
 use serde_json::json;
-use std::{collections::BTreeMap, sync::Arc, time::Duration};
+use std::{cmp::min, collections::BTreeMap, sync::Arc, time::Duration};
 use thiserror::Error;
 use tracing::{error, info, warn};
 
@@ -471,8 +471,6 @@ impl K8sContext {
     }
 
     pub async fn deploy_node(&self) -> anyhow::Result<HydraDoomNode> {
-        info!("Deploying new node.");
-
         // List available snapshots.
         // Try move from available to used dir.
         // If successful, start new node.
@@ -485,6 +483,7 @@ impl K8sContext {
             "0", // 1 for online, 0 for offline
             random_name().to_lowercase()
         );
+        info!("Deploying new node: {}", name);
         let new_node = HydraDoomNode {
             spec: HydraDoomNodeSpec::default(),
             status: None,
@@ -522,15 +521,35 @@ impl K8sContext {
                 None => false,
             })
             .collect();
+        info!(
+            "Amount of nodes in waiting state: {}",
+            available_hydra_nodes.len()
+        );
 
         if available_hydra_nodes.len() < self.config.autoscaler_low_watermark {
-            let amount = available_hydra_nodes.len() - self.config.autoscaler_low_watermark;
+            info!(
+                existing = available_hydra_nodes.len(),
+                desired = self.config.autoscaler_low_watermark,
+                "Scaling out amount of hydra nodes...",
+            );
+            let amount = min(
+                self.config.autoscaler_low_watermark - available_hydra_nodes.len(),
+                self.config.autoscaler_max_batch,
+            );
+
+            info!("About to scale the amount of Hydra nodes by {}", amount);
+
             // One after the other to avoid race conditions.
             for _ in 0..amount {
                 self.deploy_node().await?;
             }
         } else if available_hydra_nodes.len() > self.config.autoscaler_high_watermark {
             while available_hydra_nodes.len() > self.config.autoscaler_high_watermark {
+                info!(
+                    current = available_hydra_nodes.len(),
+                    desired = self.config.autoscaler_high_watermark,
+                    "Removing a Hydra Node..."
+                );
                 // High watermark will never be < 1.
                 self.remove_node(&available_hydra_nodes.pop().unwrap())
                     .await?;

diff --git a/playbook/doom-dev/main.tf b/playbook/doom-dev/main.tf
@@ -115,6 +115,11 @@ variable "autoscaler_region_prefix" {
   type = string
 }
 
+variable "autoscaler_max_batch" {
+  type    = number
+  default = 2
+}
+
 provider "kubernetes" {
   config_path    = "~/.kube/config"
   config_context = var.eks_cluster_arn
@@ -159,4 +164,5 @@ module "stage2" {
   autoscaler_high_watermark  = var.autoscaler_high_watermark
   autoscaler_low_watermark   = var.autoscaler_low_watermark
   autoscaler_region_prefix   = var.autoscaler_region_prefix
+  autoscaler_max_batch       = var.autoscaler_max_batch
 }