Skip to content

Commit 6b9a942

Browse files
Merge remote-tracking branch 'origin/main' into pluggable-bbr
A merge commit for WiP Pluggable BBR framework
2 parents 92cdb00 + 68a5612 commit 6b9a942

File tree

30 files changed

+1239
-1309
lines changed

30 files changed

+1239
-1309
lines changed

cmd/epp/runner/runner.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import (
2020
"context"
2121
"crypto/tls"
2222
"errors"
23-
"flag"
23+
goflag "flag"
2424
"fmt"
2525
"net/http"
2626
"net/http/pprof"
@@ -34,6 +34,7 @@ import (
3434

3535
"github.com/go-logr/logr"
3636
"github.com/prometheus/client_golang/prometheus"
37+
flag "github.com/spf13/pflag"
3738
uberzap "go.uber.org/zap"
3839
"go.uber.org/zap/zapcore"
3940
"google.golang.org/grpc"
@@ -60,6 +61,7 @@ import (
6061
dlmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/metrics"
6162
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
6263
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/flowcontrol"
64+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/flowcontrol/contracts"
6365
fccontroller "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/flowcontrol/controller"
6466
fcregistry "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/flowcontrol/registry"
6567
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
@@ -199,7 +201,9 @@ func (r *Runner) Run(ctx context.Context) error {
199201
opts := zap.Options{
200202
Development: true,
201203
}
202-
opts.BindFlags(flag.CommandLine)
204+
gfs := goflag.NewFlagSet("zap", goflag.ExitOnError)
205+
opts.BindFlags(gfs) // zap expects a standard Go FlagSet and pflag.FlagSet is not compatible.
206+
flag.CommandLine.AddGoFlagSet(gfs)
203207
flag.Parse()
204208
initLogging(&opts)
205209

@@ -330,7 +334,10 @@ func (r *Runner) Run(ctx context.Context) error {
330334

331335
// --- Admission Control Initialization ---
332336
var admissionController requestcontrol.AdmissionController
337+
var locator contracts.PodLocator
338+
locator = requestcontrol.NewDatastorePodLocator(ds)
333339
if r.featureGates[flowcontrol.FeatureGate] {
340+
locator = requestcontrol.NewCachedPodLocator(ctx, locator, time.Millisecond*50)
334341
setupLog.Info("Initializing experimental Flow Control layer")
335342
fcCfg, err := flowControlConfig.ValidateAndApplyDefaults()
336343
if err != nil {
@@ -342,24 +349,28 @@ func (r *Runner) Run(ctx context.Context) error {
342349
if err != nil {
343350
return fmt.Errorf("failed to initialize Flow Registry: %w", err)
344351
}
345-
fc, err := fccontroller.NewFlowController(ctx, fcCfg.Controller, registry, saturationDetector, setupLog)
352+
fc, err := fccontroller.NewFlowController(
353+
ctx,
354+
fcCfg.Controller,
355+
registry, saturationDetector,
356+
locator,
357+
setupLog,
358+
)
346359
if err != nil {
347360
return fmt.Errorf("failed to initialize Flow Controller: %w", err)
348361
}
349362
go registry.Run(ctx)
350-
admissionController = requestcontrol.NewFlowControlAdmissionController(saturationDetector, fc)
363+
admissionController = requestcontrol.NewFlowControlAdmissionController(fc)
351364
} else {
352365
setupLog.Info("Experimental Flow Control layer is disabled, using legacy admission control")
353-
admissionController = requestcontrol.NewLegacyAdmissionController(saturationDetector)
366+
admissionController = requestcontrol.NewLegacyAdmissionController(saturationDetector, locator)
354367
}
355368

356-
locator := requestcontrol.NewDatastorePodLocator(ds)
357-
cachedLocator := requestcontrol.NewCachedPodLocator(ctx, locator, time.Millisecond*50)
358369
director := requestcontrol.NewDirectorWithConfig(
359370
ds,
360371
scheduler,
361372
admissionController,
362-
cachedLocator,
373+
locator,
363374
r.requestControlConfig)
364375

365376
// --- Setup ExtProc Server Runner ---
@@ -472,7 +483,7 @@ func (r *Runner) parseConfigurationPhaseOne(ctx context.Context) (*configapi.End
472483

473484
r.registerInTreePlugins()
474485

475-
rawConfig, featureGates, err := loader.LoadConfigPhaseOne(configBytes, logger)
486+
rawConfig, featureGates, err := loader.LoadRawConfig(configBytes, logger)
476487
if err != nil {
477488
return nil, fmt.Errorf("failed to parse config - %w", err)
478489
}
@@ -498,7 +509,7 @@ func makePodListFunc(ds datastore.Datastore) func() []types.NamespacedName {
498509
func (r *Runner) parseConfigurationPhaseTwo(ctx context.Context, rawConfig *configapi.EndpointPickerConfig, ds datastore.Datastore) (*config.Config, error) {
499510
logger := log.FromContext(ctx)
500511
handle := plugins.NewEppHandle(ctx, makePodListFunc(ds))
501-
cfg, err := loader.LoadConfigPhaseTwo(rawConfig, handle, logger)
512+
cfg, err := loader.InstantiateAndConfigure(rawConfig, handle, logger)
502513

503514
if err != nil {
504515
return nil, fmt.Errorf("failed to load the configuration - %w", err)

config/charts/inferencepool/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,22 @@ The following table list the configurable parameters of the chart.
247247
| `inferenceExtension.tracing.otelExporterEndpoint` | OpenTelemetry collector endpoint. |
248248
| `inferenceExtension.tracing.sampling.sampler` | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans. |
249249
| `inferenceExtension.tracing.sampling.samplerArg` | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling. |
250+
| `inferenceExtension.volumes` | List of volumes to mount in the EPP deployment as free-form YAML. Optional. |
251+
| `inferenceExtension.volumeMounts` | List of volume mounts for the EPP container as free-form YAML. Optional. |
252+
| `inferenceExtension.sidecar.enabled` | Enables or disables the sidecar container in the EPP deployment. Defaults to `false`. |
253+
| `inferenceExtension.sidecar.name` | Name of the sidecar container. Required when the sidecar is enabled. |
254+
| `inferenceExtension.sidecar.image` | Image for the sidecar container. Required when the sidecar is enabled. |
255+
| `inferenceExtension.sidecar.imagePullPolicy` | Image pull policy for the sidecar container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `IfNotPresent`. |
256+
| `inferenceExtension.sidecar.command` | Command to run in the sidecar container as a single string. Optional. |
257+
| `inferenceExtension.sidecar.args` | Arguments to pass to the command in the sidecar container as a list of strings. Optional. |
258+
| `inferenceExtension.sidecar.env` | Environment variables to set in the sidecar container as free-form YAML. Optional. |
259+
| `inferenceExtension.sidecar.ports` | List of ports to expose for the sidecar container. Optional. |
260+
| `inferenceExtension.sidecar.livenessProbe` | Liveness probe configuration for the sidecar container. Optional. |
261+
| `inferenceExtension.sidecar.readinessProbe` | Readiness probe configuration for the sidecar container. Optional. |
262+
| `inferenceExtension.sidecar.resources` | Resource limits and requests for the sidecar container. Optional. |
263+
| `inferenceExtension.sidecar.volumeMounts` | List of volume mounts for the sidecar container. Optional. |
264+
| `inferenceExtension.sidecar.volumes` | List of volumes for the sidecar container. Optional. |
265+
| `inferenceExtension.sidecar.configMapData` | Custom key-value pairs to be included in a ConfigMap created for the sidecar container. Only used when `inferenceExtension.sidecar.enabled` is `true`. Optional. |
250266
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. |
251267
| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
252268

config/charts/inferencepool/templates/epp-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,14 @@ data:
6464
{{- end }}
6565

6666
---
67-
{{- if .Values.inferenceExtension.sidecar.enabled }}
67+
{{- if and .Values.inferenceExtension.sidecar.enabled .Values.inferenceExtension.sidecar.configMapData }}
6868
apiVersion: v1
6969
kind: ConfigMap
7070
metadata:
71-
name: {{ .Values.inferenceExtension.sidecar.configMap.name }}
71+
name: {{ include "gateway-api-inference-extension.name" . }}-sidecar
7272
namespace: {{ .Release.Namespace }}
7373
data:
74-
{{- .Values.inferenceExtension.sidecar.configMap.data | toYaml | nindent 2 }}
74+
{{- .Values.inferenceExtension.sidecar.configMapData | toYaml | nindent 2 }}
7575
{{- end }}
7676
---
7777
{{- if .Values.inferenceExtension.latencyPredictor.enabled }}

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,14 @@ spec:
182182
volumeMounts:
183183
- name: plugins-config-volume
184184
mountPath: "/config"
185+
{{- if .Values.inferenceExtension.volumeMounts }}
186+
{{- toYaml .Values.inferenceExtension.volumeMounts | nindent 8 }}
187+
{{- end }}
185188
{{- include "gateway-api-inference-extension.latencyPredictor.containers" . | nindent 6 }}
186189
volumes:
190+
{{- if .Values.inferenceExtension.volumes }}
191+
{{- toYaml .Values.inferenceExtension.volumes | nindent 6 }}
192+
{{- end }}
187193
{{- if .Values.inferenceExtension.sidecar.volumes }}
188194
{{- tpl (toYaml .Values.inferenceExtension.sidecar.volumes) $ | nindent 6 }}
189195
{{- end }}

go.mod

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,20 @@ require (
1111
github.com/google/go-cmp v0.7.0
1212
github.com/google/uuid v1.6.0
1313
github.com/hashicorp/golang-lru/v2 v2.0.7
14-
github.com/onsi/ginkgo/v2 v2.27.2
15-
github.com/onsi/gomega v1.38.2
14+
github.com/onsi/ginkgo/v2 v2.27.3
15+
github.com/onsi/gomega v1.38.3
1616
github.com/prometheus/client_golang v1.23.2
1717
github.com/prometheus/client_model v0.6.2
1818
github.com/prometheus/common v0.67.4
1919
github.com/prometheus/prometheus v0.307.3
2020
github.com/stretchr/testify v1.11.1
21-
go.opentelemetry.io/otel v1.38.0
21+
go.opentelemetry.io/otel v1.39.0
2222
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0
23-
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0
24-
go.opentelemetry.io/otel/sdk v1.38.0
23+
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.39.0
24+
go.opentelemetry.io/otel/sdk v1.39.0
2525
go.uber.org/multierr v1.11.0
2626
go.uber.org/zap v1.27.1
27-
golang.org/x/sync v0.18.0
27+
golang.org/x/sync v0.19.0
2828
google.golang.org/grpc v1.77.0
2929
google.golang.org/protobuf v1.36.10
3030
k8s.io/api v0.34.2
@@ -44,7 +44,7 @@ require (
4444

4545
require (
4646
github.com/go-logr/zapr v1.3.0
47-
github.com/openai/openai-go/v3 v3.10.0
47+
github.com/spf13/pflag v1.0.7
4848
)
4949

5050
require (
@@ -98,18 +98,13 @@ require (
9898
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
9999
github.com/prometheus/procfs v0.17.0 // indirect
100100
github.com/spf13/cobra v1.9.1 // indirect
101-
github.com/spf13/pflag v1.0.7 // indirect
102101
github.com/stoewer/go-strcase v1.3.0 // indirect
103-
github.com/tidwall/gjson v1.18.0 // indirect
104-
github.com/tidwall/match v1.1.1 // indirect
105-
github.com/tidwall/pretty v1.2.1 // indirect
106-
github.com/tidwall/sjson v1.2.5 // indirect
107102
github.com/x448/float16 v0.8.4 // indirect
108103
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
109104
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
110105
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
111-
go.opentelemetry.io/otel/metric v1.38.0 // indirect
112-
go.opentelemetry.io/otel/trace v1.38.0 // indirect
106+
go.opentelemetry.io/otel/metric v1.39.0 // indirect
107+
go.opentelemetry.io/otel/trace v1.39.0 // indirect
113108
go.opentelemetry.io/proto/otlp v1.7.1 // indirect
114109
go.uber.org/atomic v1.11.0 // indirect
115110
go.yaml.in/yaml/v2 v2.4.3 // indirect
@@ -119,7 +114,7 @@ require (
119114
golang.org/x/mod v0.29.0 // indirect
120115
golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 // indirect
121116
golang.org/x/oauth2 v0.32.0 // indirect
122-
golang.org/x/sys v0.37.0 // indirect
117+
golang.org/x/sys v0.39.0 // indirect
123118
golang.org/x/term v0.36.0 // indirect
124119
golang.org/x/text v0.30.0 // indirect
125120
golang.org/x/time v0.13.0 // indirect

0 commit comments

Comments
 (0)