Ksohi/concurrency limiting guide (#3080)

### Description of change ##### Checklist - [ ] Tested in playground or other setup - [ ] Screenshot (Grafana) from playground added to PR for 15+ minute run - [x] Documentation is changed or added - [ ] Tests and/or benchmarks are included - [ ] Breaking changes  ## Summary by CodeRabbit - **Documentation** - Added a new guide for per-user concurrency limiting, including an overview, implementation instructions, and monitoring advice. - Introduced a validation script to assist users in setting up concurrency limiting policies. - **New Features** - Enhanced the Aperture SDK example to include user input for client initialization and improved flow control logic in the concurrency limiting feature.
fluxninja · Dec 28, 2023 · 05caa9a · 05caa9a
1 parent 0690000
commit 05caa9a
Show file tree

Hide file tree

Showing 31 changed files with 958 additions and 1,718 deletions.
diff --git a/concurrency-limiting-test.yaml b/concurrency-limiting-test.yaml
@@ -0,0 +1,18 @@
+blueprint: concurrency-limiting/base
+uri: github.com/fluxninja/aperture/blueprints@latest
+policy:
+  components: []
+  policy_name: concurrency-limit-test
+  resources:
+    flow_control:
+      classifiers: []
+  concurrency_limiter:
+    alerter:
+      alert_name: "Too many inflight requests"
+    max_concurrency: 10
+    parameters:
+      limit_by_label_key: "user_id"
+      max_inflight_duration: 60s
+    request_parameters: {}
+    selectors:
+      - control_point: concurrency-limiting-feature
diff --git a/docs/content/code-snippets.json b/docs/content/code-snippets.json
@@ -29,7 +29,8 @@
   },
   "ts": {
     "CLFlowShouldRun": "if (flow.shouldRun()) {\n    console.log(\"Request accepted. Processing...\" + flow.checkResponse());\n  } else {\n    console.log(\"Request rejected due to concurrency limit. Try again later.\");\n  }\n\n  flow.end();",
-    "CLStartFlow": "const flow = await apertureClient.startFlow(\"concurrency-limiting-feature\", {\n    labels: {\n      user_id: \"some_user_id\",\n    },\n    grpcCallOptions: {\n      deadline: Date.now() + 300,\n    },\n  });",
+    "CLStartFlow": "const flow = await apertureClient.startFlow(\"concurrency-limiting-feature\", {\n    labels: {\n      user_id: \"some_user_id\",\n    },\n    grpcCallOptions: {\n      deadline: Date.now() + 1000,\n    },\n  });",
+    "CSStartFlow": "const flow = await apertureClient.startFlow(\n    \"concurrency-scheduling-feature\",\n    {\n      labels: {\n        user_id: \"some_user_id\",\n        priority: priority.toString(),\n        workload: tier,\n      },\n      grpcCallOptions: {\n        deadline: Date.now() + 120000, // ms\n      },\n    },\n  );\n\n  if (flow.shouldRun()) {\n    console.log(`[${tier} Tier] Request accepted with priority ${priority}.`);\n    // sleep for 5 seconds to simulate a long-running request\n    await new Promise((resolve) =\u003e setTimeout(resolve, 5000));\n  } else {\n    console.log(`[${tier} Tier] Request rejected. Priority was ${priority}.`);\n  }\n\n  flow.end();",
     "CStartFlow": "flow = await apertureClient.startFlow(\"caching-example\", {\n            labels: {\n                user_id: \"some_user_id\",\n            },\n            grpcCallOptions: {\n                deadline: Date.now() + 5000, // ms\n            },\n            resultCacheKey: \"cache\",\n        });",
     "CacheLookup": "if (flow.resultCache().getLookupStatus() === LookupStatus.Hit) {\n            console.log(flow.resultCache().getValue()?.toString());\n        } else {\n            console.log(\"Cache miss, setting cache value\");\n\n            const resString = \"Hello, world!\";\n            const buffer = Buffer.from(resString);\n            const setResp = await flow.setResultCache({\n                value: buffer,\n                ttl: {\n                    seconds: 10,\n                    nanos: 0,\n                },\n            });\n        }\n\n        flow.end();",
     "Priority": "const userTiers = {\n    \"platinum\": 8,\n    \"gold\": 4,\n    \"silver\": 2,\n    \"free\": 1,\n};",

diff --git a/docs/content/guides/api-quota-management.md b/docs/content/guides/api-quota-management.md
@@ -277,3 +277,5 @@ latency.
 
 These panels display insights into queue duration for `workload` requests and
 highlight the average of prioritized requests that moved ahead in the queue.
+Preemption for each token is measured as the average number of tokens a request
+belonging to a specific workload gets preempted in the queue.
diff --git a/docs/content/guides/assets/concurrency-scheduling/concurrency-scheduling.mmd b/docs/content/guides/assets/concurrency-scheduling/concurrency-scheduling.mmd
@@ -0,0 +1,21 @@
+flowchart LR
+classDef Orange fill:#F8773D,stroke:#000000,stroke-width:2px;
+classDef Green fill:#56AE89,stroke:#000000,stroke-width:2px;
+classDef Red fill:#F13C15,stroke:#000000,stroke-width:1px;
+classDef Pink fill:#ffb6c1,stroke:#000000,stroke-width:1px;
+
+TC[\Token Counter/]
+class TC Orange
+
+Scheduler
+class Scheduler Orange
+
+SDK
+class SDK Green
+
+subgraph Aperture_Cloud ["Aperture Cloud"]
+    Scheduler -- "Counting" --> TC
+end
+class Aperture_Cloud Green
+
+SDK -- "Schedule Request" --> Scheduler
diff --git a/docs/content/guides/assets/concurrency-scheduling/concurrency-scheduling.mmd.md5sum b/docs/content/guides/assets/concurrency-scheduling/concurrency-scheduling.mmd.md5sum
@@ -0,0 +1 @@
+5df63f64375615e3ec9761cb92b987af
diff --git a/docs/content/guides/assets/concurrency-scheduling/concurrency-scheduling.mmd.svg b/docs/content/guides/assets/concurrency-scheduling/concurrency-scheduling.mmd.svg
diff --git a/docs/content/guides/assets/concurrency-scheduling/graph.mmd b/docs/content/guides/assets/concurrency-scheduling/graph.mmd
@@ -0,0 +1,34 @@
+flowchart LR
+subgraph root.0[<center>ConcurrencyScheduler<br/>1 selectors</center>]
+subgraph root.0_inports[ ]
+style root.0_inports fill:none,stroke:none
+root.0max_capacity[max_capacity]
+end
+subgraph root.0_outports[ ]
+style root.0_outports fill:none,stroke:none
+root.0accept_percentage[accept_percentage]
+end
+end
+root.0_max_capacity_FakeConstantout((20.00))
+subgraph root.1[<center>Decider<br/>gte for 0s</center>]
+subgraph root.1_inports[ ]
+style root.1_inports fill:none,stroke:none
+root.1lhs[lhs]
+root.1rhs[rhs]
+end
+subgraph root.1_outports[ ]
+style root.1_outports fill:none,stroke:none
+root.1output[output]
+end
+end
+root.1_rhs_FakeConstantout((90.00))
+subgraph root.2[<center>Alerter<br/>Too many inflight requests/...</center>]
+subgraph root.2_inports[ ]
+style root.2_inports fill:none,stroke:none
+root.2signal[signal]
+end
+end
+root.0accept_percentage --> |ACCEPT_PERCENTAGE| root.1lhs
+root.0_max_capacity_FakeConstantout --> root.0max_capacity
+root.1output --> |ACCEPT_PERCENTAGE_ALERT| root.2signal
+root.1_rhs_FakeConstantout --> root.1rhs
diff --git a/docs/content/guides/assets/concurrency-scheduling/graph.mmd.md5sum b/docs/content/guides/assets/concurrency-scheduling/graph.mmd.md5sum
@@ -0,0 +1 @@
+46a81e4785ea5594fabc03bcaeefec54
diff --git a/docs/content/guides/assets/concurrency-scheduling/graph.mmd.svg b/docs/content/guides/assets/concurrency-scheduling/graph.mmd.svg
diff --git a/docs/content/guides/assets/concurrency-scheduling/policy.yaml b/docs/content/guides/assets/concurrency-scheduling/policy.yaml
@@ -0,0 +1,48 @@
+apiVersion: fluxninja.com/v1alpha1
+kind: Policy
+metadata:
+  labels:
+    fluxninja.com/validate: "true"
+  name: concurrency-scheduling-test
+spec:
+  circuit:
+    components:
+    - flow_control:
+        concurrency_scheduler:
+          concurrency_limiter:
+            limit_by_label_key: user_id
+            max_inflight_duration: 60s
+          in_ports:
+            max_concurrency:
+              constant_signal:
+                value: 20
+          out_ports:
+            accept_percentage:
+              signal_name: ACCEPT_PERCENTAGE
+          scheduler:
+            priority_label_key: priority
+            tokens_label_key: tokens
+            workload_label_key: workload
+          selectors:
+          - control_point: concurrency-scheduling-feature
+    - decider:
+        in_ports:
+          lhs:
+            signal_name: ACCEPT_PERCENTAGE
+          rhs:
+            constant_signal:
+              value: 90
+        operator: gte
+        out_ports:
+          output:
+            signal_name: ACCEPT_PERCENTAGE_ALERT
+    - alerter:
+        in_ports:
+          signal:
+            signal_name: ACCEPT_PERCENTAGE_ALERT
+        parameters:
+          alert_name: Too many inflight requests
+    evaluation_interval: 1s
+  resources:
+    flow_control:
+      classifiers: []
diff --git a/docs/content/guides/assets/concurrency-scheduling/queue.png b/docs/content/guides/assets/concurrency-scheduling/queue.png
diff --git a/docs/content/guides/assets/concurrency-scheduling/request-metrics.png b/docs/content/guides/assets/concurrency-scheduling/request-metrics.png
diff --git a/docs/content/guides/assets/concurrency-scheduling/validate.sh b/docs/content/guides/assets/concurrency-scheduling/validate.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+set -e
+
+git_root=$(git rev-parse --show-toplevel)
+
+# shellcheck disable=SC1091
+source "$git_root"/docs/tools/aperturectl/validate_common.sh
+
+generate_from_values \
+	values.yaml \
+	tmp
+
+# copy the generated policy and graph to this (assets) directory so that they can be used in the docs
+cp tmp/policies/concurrency-scheduling-test-cr.yaml policy.yaml
+cp tmp/graphs/concurrency-scheduling-test-cr.mmd graph.mmd
+
+# git add the generated policy and graph
+"$git_root"/scripts/git_add_safely.sh policy.yaml graph.mmd
+
+# remove the tmp directory
+rm -rf tmp
diff --git a/docs/content/guides/assets/concurrency-scheduling/values.yaml b/docs/content/guides/assets/concurrency-scheduling/values.yaml
@@ -0,0 +1,22 @@
+# yaml-language-server: $schema=../../../../../blueprints/concurrency-scheduling/base/gen/definitions.json
+blueprint: concurrency-scheduling/base
+uri: ../../../../../blueprints
+policy:
+  policy_name: "concurrency-scheduling-test"
+  components: []
+  concurrency_scheduler:
+    alerter:
+      alert_name: "Too many inflight requests"
+    concurrency_limiter:
+      limit_by_label_key: "user_id"
+      max_inflight_duration: "60s"
+    max_concurrency: 20
+    scheduler:
+      priority_label_key: "priority"
+      tokens_label_key: "tokens"
+      workload_label_key: "workload"
+    selectors:
+      - control_point: "concurrency-scheduling-feature"
+  resources:
+    flow_control:
+      classifiers: []
diff --git a/docs/content/guides/assets/concurrency-scheduling/workloads.png b/docs/content/guides/assets/concurrency-scheduling/workloads.png
diff --git a/...tent/guides/assets/per-user-concurrency-limiting/concurrency-limiter-graph1.png b/...tent/guides/assets/per-user-concurrency-limiting/concurrency-limiter-graph1.png
diff --git a/...tent/guides/assets/per-user-concurrency-limiting/concurrency-limiter-graph2.png b/...tent/guides/assets/per-user-concurrency-limiting/concurrency-limiter-graph2.png
diff --git a/docs/content/guides/assets/per-user-concurrency-limiting/concurrency-limiting.mmd b/docs/content/guides/assets/per-user-concurrency-limiting/concurrency-limiting.mmd
@@ -0,0 +1,15 @@
+flowchart RL
+classDef TokenCounter fill:#F8773D,stroke:#000000,stroke-width:2px;
+classDef Service fill:#56AE89,stroke:#000000,stroke-width:2px;
+
+subgraph Aperture_Cloud ["Aperture Cloud"]
+TB[\Token Counter/]
+class TB TokenCounter
+end
+class Aperture_Cloud Service
+
+TB <-- "counting tokens" --> SDK
+
+  subgraph "SDK"
+  end
+  class SDK Service
diff --git a/docs/content/guides/assets/per-user-concurrency-limiting/concurrency-limiting.mmd.md5sum b/docs/content/guides/assets/per-user-concurrency-limiting/concurrency-limiting.mmd.md5sum
@@ -0,0 +1 @@
+b15a13197b33b05703fc7e0551d693a6