@@ -28,7 +28,6 @@ import (
2828
2929 "sigs.k8s.io/controller-runtime/pkg/log"
3030 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
31- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
3231 backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
3332 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
3433 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
@@ -57,7 +56,7 @@ type Datastore interface {
5756
5857// Scheduler defines the interface required by the Director for scheduling.
5958type Scheduler interface {
60- Schedule (ctx context.Context , request * schedulingtypes.LLMRequest , candidatePods []schedulingtypes.Pod ) (result * schedulingtypes.SchedulingResult , err error )
59+ Schedule (ctx context.Context , request * schedulingtypes.LLMRequest , candidatePods []schedulingtypes.Endpoint ) (result * schedulingtypes.SchedulingResult , err error )
6160}
6261
6362// NewDirectorWithConfig creates a new Director instance with all dependencies.
@@ -243,34 +242,34 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC
243242 return reqCtx , errutil.Error {Code : errutil .Internal , Msg : "results must be greater than zero" }
244243 }
245244 // primary profile is used to set destination
246- targetPods := []* backend. Pod {}
245+ targetMetadatas := []* datalayer. EndpointMetadata {}
247246 targetEndpoints := []string {}
248247
249- for _ , pod := range result .ProfileResults [result .PrimaryProfileName ].TargetPods {
250- curPod := pod .GetPod ()
251- curEndpoint := net .JoinHostPort (curPod .GetIPAddress (), curPod .GetPort ())
252- targetPods = append (targetPods , curPod )
248+ for _ , pod := range result .ProfileResults [result .PrimaryProfileName ].TargetEndpoints {
249+ curMetadata := pod .GetMetadata ()
250+ curEndpoint := net .JoinHostPort (curMetadata .GetIPAddress (), curMetadata .GetPort ())
251+ targetMetadatas = append (targetMetadatas , curMetadata )
253252 targetEndpoints = append (targetEndpoints , curEndpoint )
254253 }
255254
256255 multiEndpointString := strings .Join (targetEndpoints , "," )
257256 logger .V (logutil .VERBOSE ).Info ("Request handled" , "objectiveKey" , reqCtx .ObjectiveKey , "incomingModelName" , reqCtx .IncomingModelName , "targetModel" , reqCtx .TargetModelName , "endpoint" , multiEndpointString )
258257
259- reqCtx .TargetPod = targetPods [0 ]
258+ reqCtx .TargetPod = targetMetadatas [0 ]
260259 reqCtx .TargetEndpoint = multiEndpointString
261260
262261 d .runPreRequestPlugins (ctx , reqCtx .SchedulingRequest , result )
263262
264263 return reqCtx , nil
265264}
266265
267- func (d * Director ) toSchedulerPodMetrics (pods []backendmetrics.PodMetrics ) []schedulingtypes.Pod {
268- pm := make ([]schedulingtypes.Pod , len (pods ))
266+ func (d * Director ) toSchedulerPodMetrics (pods []backendmetrics.PodMetrics ) []schedulingtypes.Endpoint {
267+ pm := make ([]schedulingtypes.Endpoint , len (pods ))
269268 for i , pod := range pods {
270269 if pod .GetAttributes () != nil {
271- pm [i ] = & schedulingtypes.PodMetrics {Pod : pod .GetMetadata ().Clone (), MetricsState : pod .GetMetrics ().Clone (), AttributeMap : pod .GetAttributes ().Clone ()}
270+ pm [i ] = & schedulingtypes.PodMetrics {EndpointMetadata : pod .GetMetadata ().Clone (), MetricsState : pod .GetMetrics ().Clone (), AttributeMap : pod .GetAttributes ().Clone ()}
272271 } else {
273- pm [i ] = & schedulingtypes.PodMetrics {Pod : pod .GetMetadata ().Clone (), MetricsState : pod .GetMetrics ().Clone (), AttributeMap : datalayer .NewAttributes ()}
272+ pm [i ] = & schedulingtypes.PodMetrics {EndpointMetadata : pod .GetMetadata ().Clone (), MetricsState : pod .GetMetrics ().Clone (), AttributeMap : datalayer .NewAttributes ()}
274273 }
275274 }
276275
@@ -321,7 +320,7 @@ func (d *Director) HandleResponseBodyComplete(ctx context.Context, reqCtx *handl
321320 return reqCtx , nil
322321}
323322
324- func (d * Director ) GetRandomPod () * backend. Pod {
323+ func (d * Director ) GetRandomEndpoint () * datalayer. EndpointMetadata {
325324 pods := d .datastore .PodList (datastore .AllPodsPredicate )
326325 if len (pods ) == 0 {
327326 return nil
@@ -344,16 +343,18 @@ func (d *Director) runPreRequestPlugins(ctx context.Context, request *scheduling
344343}
345344
346345func (d * Director ) runPrepareDataPlugins (ctx context.Context ,
347- request * schedulingtypes.LLMRequest , pods []schedulingtypes.Pod ) error {
348- return prepareDataPluginsWithTimeout (prepareDataTimeout , d .requestControlPlugins .prepareDataPlugins , ctx , request , pods )
346+ request * schedulingtypes.LLMRequest , endpoints []schedulingtypes.Endpoint ) error {
347+ return prepareDataPluginsWithTimeout (
348+ prepareDataTimeout , d .requestControlPlugins .prepareDataPlugins , ctx , request , endpoints )
349+
349350}
350351
351352func (d * Director ) runAdmissionPlugins (ctx context.Context ,
352- request * schedulingtypes.LLMRequest , pods []schedulingtypes.Pod ) bool {
353+ request * schedulingtypes.LLMRequest , endpoints []schedulingtypes.Endpoint ) bool {
353354 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
354355 for _ , plugin := range d .requestControlPlugins .admissionPlugins {
355356 loggerDebug .Info ("Running AdmitRequest plugin" , "plugin" , plugin .TypedName ())
356- if denyReason := plugin .AdmitRequest (ctx , request , pods ); denyReason != nil {
357+ if denyReason := plugin .AdmitRequest (ctx , request , endpoints ); denyReason != nil {
357358 loggerDebug .Info ("AdmitRequest plugin denied the request" , "plugin" , plugin .TypedName (), "reason" , denyReason .Error ())
358359 return false
359360 }
@@ -362,34 +363,34 @@ func (d *Director) runAdmissionPlugins(ctx context.Context,
362363 return true
363364}
364365
365- func (d * Director ) runResponseReceivedPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend. Pod ) {
366+ func (d * Director ) runResponseReceivedPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetEndpoint * datalayer. EndpointMetadata ) {
366367 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
367368 for _ , plugin := range d .requestControlPlugins .responseReceivedPlugins {
368369 loggerDebug .Info ("Running ResponseReceived plugin" , "plugin" , plugin .TypedName ())
369370 before := time .Now ()
370- plugin .ResponseReceived (ctx , request , response , targetPod )
371+ plugin .ResponseReceived (ctx , request , response , targetEndpoint )
371372 metrics .RecordPluginProcessingLatency (ResponseReceivedExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
372373 loggerDebug .Info ("Completed running ResponseReceived plugin successfully" , "plugin" , plugin .TypedName ())
373374 }
374375}
375376
376- func (d * Director ) runResponseStreamingPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend. Pod ) {
377+ func (d * Director ) runResponseStreamingPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetEndpoint * datalayer. EndpointMetadata ) {
377378 loggerTrace := log .FromContext (ctx ).V (logutil .TRACE )
378379 for _ , plugin := range d .requestControlPlugins .responseStreamingPlugins {
379380 loggerTrace .Info ("Running ResponseStreaming plugin" , "plugin" , plugin .TypedName ())
380381 before := time .Now ()
381- plugin .ResponseStreaming (ctx , request , response , targetPod )
382+ plugin .ResponseStreaming (ctx , request , response , targetEndpoint )
382383 metrics .RecordPluginProcessingLatency (ResponseStreamingExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
383384 loggerTrace .Info ("Completed running ResponseStreaming plugin successfully" , "plugin" , plugin .TypedName ())
384385 }
385386}
386387
387- func (d * Director ) runResponseCompletePlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend. Pod ) {
388+ func (d * Director ) runResponseCompletePlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetEndpoint * datalayer. EndpointMetadata ) {
388389 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
389390 for _ , plugin := range d .requestControlPlugins .responseCompletePlugins {
390391 loggerDebug .Info ("Running ResponseComplete plugin" , "plugin" , plugin .TypedName ())
391392 before := time .Now ()
392- plugin .ResponseComplete (ctx , request , response , targetPod )
393+ plugin .ResponseComplete (ctx , request , response , targetEndpoint )
393394 metrics .RecordPluginProcessingLatency (ResponseCompleteExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
394395 loggerDebug .Info ("Completed running ResponseComplete plugin successfully" , "plugin" , plugin .TypedName ())
395396 }
0 commit comments