@@ -28,7 +28,6 @@ import (
2828
2929 "sigs.k8s.io/controller-runtime/pkg/log"
3030 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
31- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
3231 backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
3332 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
3433 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
@@ -57,7 +56,7 @@ type Datastore interface {
5756
5857// Scheduler defines the interface required by the Director for scheduling.
5958type Scheduler interface {
60- Schedule (ctx context.Context , request * schedulingtypes.LLMRequest , candidatePods []schedulingtypes.Pod ) (result * schedulingtypes.SchedulingResult , err error )
59+ Schedule (ctx context.Context , request * schedulingtypes.LLMRequest , candidatePods []schedulingtypes.Endpoint ) (result * schedulingtypes.SchedulingResult , err error )
6160}
6261
6362// NewDirectorWithConfig creates a new Director instance with all dependencies.
@@ -245,34 +244,34 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC
245244 return reqCtx , errutil.Error {Code : errutil .Internal , Msg : "results must be greater than zero" }
246245 }
247246 // primary profile is used to set destination
248- targetPods := []* backend. Pod {}
247+ targetMetadatas := []* datalayer. EndpointMetadata {}
249248 targetEndpoints := []string {}
250249
251- for _ , pod := range result .ProfileResults [result .PrimaryProfileName ].TargetPods {
252- curPod := pod .GetPod ()
253- curEndpoint := net .JoinHostPort (curPod .GetIPAddress (), curPod .GetPort ())
254- targetPods = append (targetPods , curPod )
250+ for _ , pod := range result .ProfileResults [result .PrimaryProfileName ].TargetEndpoints {
251+ curMetadata := pod .GetMetadata ()
252+ curEndpoint := net .JoinHostPort (curMetadata .GetIPAddress (), curMetadata .GetPort ())
253+ targetMetadatas = append (targetMetadatas , curMetadata )
255254 targetEndpoints = append (targetEndpoints , curEndpoint )
256255 }
257256
258257 multiEndpointString := strings .Join (targetEndpoints , "," )
259258 logger .V (logutil .VERBOSE ).Info ("Request handled" , "objectiveKey" , reqCtx .ObjectiveKey , "incomingModelName" , reqCtx .IncomingModelName , "targetModel" , reqCtx .TargetModelName , "endpoint" , multiEndpointString )
260259
261- reqCtx .TargetPod = targetPods [0 ]
260+ reqCtx .TargetPod = targetMetadatas [0 ]
262261 reqCtx .TargetEndpoint = multiEndpointString
263262
264263 d .runPreRequestPlugins (ctx , reqCtx .SchedulingRequest , result )
265264
266265 return reqCtx , nil
267266}
268267
269- func (d * Director ) toSchedulerPodMetrics (pods []backendmetrics.PodMetrics ) []schedulingtypes.Pod {
270- pm := make ([]schedulingtypes.Pod , len (pods ))
268+ func (d * Director ) toSchedulerPodMetrics (pods []backendmetrics.PodMetrics ) []schedulingtypes.Endpoint {
269+ pm := make ([]schedulingtypes.Endpoint , len (pods ))
271270 for i , pod := range pods {
272271 if pod .GetAttributes () != nil {
273- pm [i ] = & schedulingtypes.PodMetrics {Pod : pod .GetMetadata ().Clone (), MetricsState : pod .GetMetrics ().Clone (), AttributeMap : pod .GetAttributes ().Clone ()}
272+ pm [i ] = & schedulingtypes.PodMetrics {EndpointMetadata : pod .GetMetadata ().Clone (), MetricsState : pod .GetMetrics ().Clone (), AttributeMap : pod .GetAttributes ().Clone ()}
274273 } else {
275- pm [i ] = & schedulingtypes.PodMetrics {Pod : pod .GetMetadata ().Clone (), MetricsState : pod .GetMetrics ().Clone (), AttributeMap : datalayer .NewAttributes ()}
274+ pm [i ] = & schedulingtypes.PodMetrics {EndpointMetadata : pod .GetMetadata ().Clone (), MetricsState : pod .GetMetrics ().Clone (), AttributeMap : datalayer .NewAttributes ()}
276275 }
277276 }
278277
@@ -323,7 +322,7 @@ func (d *Director) HandleResponseBodyComplete(ctx context.Context, reqCtx *handl
323322 return reqCtx , nil
324323}
325324
326- func (d * Director ) GetRandomPod () * backend. Pod {
325+ func (d * Director ) GetRandomEndpoint () * datalayer. EndpointMetadata {
327326 pods := d .datastore .PodList (datastore .AllPodsPredicate )
328327 if len (pods ) == 0 {
329328 return nil
@@ -346,16 +345,18 @@ func (d *Director) runPreRequestPlugins(ctx context.Context, request *scheduling
346345}
347346
348347func (d * Director ) runPrepareDataPlugins (ctx context.Context ,
349- request * schedulingtypes.LLMRequest , pods []schedulingtypes.Pod ) error {
350- return prepareDataPluginsWithTimeout (prepareDataTimeout , d .requestControlPlugins .prepareDataPlugins , ctx , request , pods )
348+ request * schedulingtypes.LLMRequest , endpoints []schedulingtypes.Endpoint ) error {
349+ return prepareDataPluginsWithTimeout (
350+ prepareDataTimeout , d .requestControlPlugins .prepareDataPlugins , ctx , request , endpoints )
351+
351352}
352353
353354func (d * Director ) runAdmissionPlugins (ctx context.Context ,
354- request * schedulingtypes.LLMRequest , pods []schedulingtypes.Pod ) bool {
355+ request * schedulingtypes.LLMRequest , endpoints []schedulingtypes.Endpoint ) bool {
355356 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
356357 for _ , plugin := range d .requestControlPlugins .admissionPlugins {
357358 loggerDebug .Info ("Running AdmitRequest plugin" , "plugin" , plugin .TypedName ())
358- if denyReason := plugin .AdmitRequest (ctx , request , pods ); denyReason != nil {
359+ if denyReason := plugin .AdmitRequest (ctx , request , endpoints ); denyReason != nil {
359360 loggerDebug .Info ("AdmitRequest plugin denied the request" , "plugin" , plugin .TypedName (), "reason" , denyReason .Error ())
360361 return false
361362 }
@@ -364,34 +365,34 @@ func (d *Director) runAdmissionPlugins(ctx context.Context,
364365 return true
365366}
366367
367- func (d * Director ) runResponseReceivedPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend. Pod ) {
368+ func (d * Director ) runResponseReceivedPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetEndpoint * datalayer. EndpointMetadata ) {
368369 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
369370 for _ , plugin := range d .requestControlPlugins .responseReceivedPlugins {
370371 loggerDebug .Info ("Running ResponseReceived plugin" , "plugin" , plugin .TypedName ())
371372 before := time .Now ()
372- plugin .ResponseReceived (ctx , request , response , targetPod )
373+ plugin .ResponseReceived (ctx , request , response , targetEndpoint )
373374 metrics .RecordPluginProcessingLatency (ResponseReceivedExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
374375 loggerDebug .Info ("Completed running ResponseReceived plugin successfully" , "plugin" , plugin .TypedName ())
375376 }
376377}
377378
378- func (d * Director ) runResponseStreamingPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend. Pod ) {
379+ func (d * Director ) runResponseStreamingPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetEndpoint * datalayer. EndpointMetadata ) {
379380 loggerTrace := log .FromContext (ctx ).V (logutil .TRACE )
380381 for _ , plugin := range d .requestControlPlugins .responseStreamingPlugins {
381382 loggerTrace .Info ("Running ResponseStreaming plugin" , "plugin" , plugin .TypedName ())
382383 before := time .Now ()
383- plugin .ResponseStreaming (ctx , request , response , targetPod )
384+ plugin .ResponseStreaming (ctx , request , response , targetEndpoint )
384385 metrics .RecordPluginProcessingLatency (ResponseStreamingExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
385386 loggerTrace .Info ("Completed running ResponseStreaming plugin successfully" , "plugin" , plugin .TypedName ())
386387 }
387388}
388389
389- func (d * Director ) runResponseCompletePlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend. Pod ) {
390+ func (d * Director ) runResponseCompletePlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetEndpoint * datalayer. EndpointMetadata ) {
390391 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
391392 for _ , plugin := range d .requestControlPlugins .responseCompletePlugins {
392393 loggerDebug .Info ("Running ResponseComplete plugin" , "plugin" , plugin .TypedName ())
393394 before := time .Now ()
394- plugin .ResponseComplete (ctx , request , response , targetPod )
395+ plugin .ResponseComplete (ctx , request , response , targetEndpoint )
395396 metrics .RecordPluginProcessingLatency (ResponseCompleteExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
396397 loggerDebug .Info ("Completed running ResponseComplete plugin successfully" , "plugin" , plugin .TypedName ())
397398 }
0 commit comments