kubernetes-sigs
diff --git a/‎pkg/epp/backend/metrics/fake.go‎
Lines changed: 5 additions & 6 deletions b/‎pkg/epp/backend/metrics/fake.go‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎pkg/epp/backend/metrics/metrics.go‎
Lines changed: 7 additions & 7 deletions b/‎pkg/epp/backend/metrics/metrics.go‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎pkg/epp/backend/metrics/metrics_test.go‎
Lines changed: 3 additions & 3 deletions b/‎pkg/epp/backend/metrics/metrics_test.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pkg/epp/config/loader/configloader_test.go‎
Lines changed: 2 additions & 2 deletions b/‎pkg/epp/config/loader/configloader_test.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/epp/handlers/request.go‎
Lines changed: 5 additions & 5 deletions b/‎pkg/epp/handlers/request.go‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎pkg/epp/handlers/response_test.go‎
Lines changed: 3 additions & 3 deletions b/‎pkg/epp/handlers/response_test.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pkg/epp/handlers/server.go‎
Lines changed: 2 additions & 3 deletions b/‎pkg/epp/handlers/server.go‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎pkg/epp/requestcontrol/dag_test.go‎
Lines changed: 2 additions & 2 deletions b/‎pkg/epp/requestcontrol/dag_test.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/epp/requestcontrol/director.go‎
Lines changed: 24 additions & 23 deletions b/‎pkg/epp/requestcontrol/director.go‎
Lines changed: 24 additions & 23 deletions
@@ -25,14 +25,13 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
 // FakePodMetrics is an implementation of PodMetrics that doesn't run the async refresh loop.
 type FakePodMetrics struct {
-	Pod        *backend.Pod
+	Metadata   *datalayer.EndpointMetadata
 	Metrics    *MetricsState
 	Attributes *datalayer.Attributes
 }
@@ -41,16 +40,16 @@ func (fpm *FakePodMetrics) String() string {
 	return fmt.Sprintf("Metadata: %v; Metrics: %v", fpm.GetMetadata(), fpm.GetMetrics())
 }
 
-func (fpm *FakePodMetrics) GetMetadata() *backend.Pod {
-	return fpm.Pod
+func (fpm *FakePodMetrics) GetMetadata() *datalayer.EndpointMetadata {
+	return fpm.Metadata
 }
 
 func (fpm *FakePodMetrics) GetMetrics() *MetricsState {
 	return fpm.Metrics
 }
 
 func (fpm *FakePodMetrics) UpdateMetadata(metadata *datalayer.EndpointMetadata) {
-	fpm.Pod = metadata
+	fpm.Metadata = metadata
 }
 func (fpm *FakePodMetrics) GetAttributes() *datalayer.Attributes {
 	return fpm.Attributes
@@ -72,7 +71,7 @@ type FakePodMetricsClient struct {
 	Res   map[types.NamespacedName]*MetricsState
 }
 
-func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState) (*MetricsState, error) {
+func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *datalayer.EndpointMetadata, existing *MetricsState) (*MetricsState, error) {
 	f.errMu.RLock()
 	err, ok := f.Err[pod.NamespacedName]
 	f.errMu.RUnlock()
 
@@ -28,7 +28,7 @@ import (
 	"github.com/prometheus/common/model"
 	"go.uber.org/multierr"
 
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
 )
 
 const (
@@ -50,22 +50,22 @@ type PodMetricsClientImpl struct {
 }
 
 // FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
-func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState) (*MetricsState, error) {
-	url := p.getMetricEndpoint(pod)
+func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, metadata *datalayer.EndpointMetadata, existing *MetricsState) (*MetricsState, error) {
+	url := p.getMetricEndpoint(metadata)
 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create request: %v", err)
 	}
 	resp, err := p.Client.Do(req)
 	if err != nil {
-		return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod.NamespacedName, err)
+		return nil, fmt.Errorf("failed to fetch metrics from %s: %w", metadata.NamespacedName, err)
 	}
 	defer func() {
 		_ = resp.Body.Close()
 	}()
 
 	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("unexpected status code from %s: %v", pod.NamespacedName, resp.StatusCode)
+		return nil, fmt.Errorf("unexpected status code from %s: %v", metadata.NamespacedName, resp.StatusCode)
 	}
 
 	parser := expfmt.NewTextParser(model.LegacyValidation)
@@ -76,8 +76,8 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
 	return p.promToPodMetrics(metricFamilies, existing)
 }
 
-func (p *PodMetricsClientImpl) getMetricEndpoint(pod *backend.Pod) string {
-	return p.ModelServerMetricsScheme + "://" + pod.GetMetricsHost() + p.ModelServerMetricsPath
+func (p *PodMetricsClientImpl) getMetricEndpoint(metadata *datalayer.EndpointMetadata) string {
+	return p.ModelServerMetricsScheme + "://" + metadata.GetMetricsHost() + p.ModelServerMetricsPath
 }
 
 // promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.
 
@@ -31,7 +31,7 @@ import (
 	"google.golang.org/protobuf/proto"
 	"k8s.io/apimachinery/pkg/types"
 
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
@@ -577,7 +577,7 @@ func TestPromToPodMetrics(t *testing.T) {
 // there's no server running on the specified port.
 func TestFetchMetrics(t *testing.T) {
 	ctx := logutil.NewTestLoggerIntoContext(context.Background())
-	pod := &backend.Pod{
+	metadata := &datalayer.EndpointMetadata{
 		Address:     "127.0.0.1",
 		Port:        "9999",
 		MetricsHost: "127.0.0.1:9999",
@@ -594,7 +594,7 @@ func TestFetchMetrics(t *testing.T) {
 		Client:                   http.DefaultClient,
 	}
 
-	_, err := p.FetchMetrics(ctx, pod, existing) // Use a port that's unlikely to be in use
+	_, err := p.FetchMetrics(ctx, metadata, existing) // Use a port that's unlikely to be in use
 	if err == nil {
 		t.Errorf("FetchMetrics() expected error, got nil")
 	}
 
@@ -408,14 +408,14 @@ func (m *mockPlugin) TypedName() plugins.TypedName { return m.t }
 // Mock Scorer
 type mockScorer struct{ mockPlugin }
 
-func (m *mockScorer) Score(context.Context, *types.CycleState, *types.LLMRequest, []types.Pod) map[types.Pod]float64 {
+func (m *mockScorer) Score(context.Context, *types.CycleState, *types.LLMRequest, []types.Endpoint) map[types.Endpoint]float64 {
 	return nil
 }
 
 // Mock Picker
 type mockPicker struct{ mockPlugin }
 
-func (m *mockPicker) Pick(context.Context, *types.CycleState, []*types.ScoredPod) *types.ProfileRunResult {
+func (m *mockPicker) Pick(context.Context, *types.CycleState, []*types.ScoredEndpoint) *types.ProfileRunResult {
 	return nil
 }
 
 
@@ -40,15 +40,15 @@ func (s *StreamingServer) HandleRequestHeaders(reqCtx *RequestContext, req *extP
 
 	// an EoS in the request headers means this request has no body or trailers.
 	if req.RequestHeaders.EndOfStream {
-		// We will route this request to a random pod as this is assumed to just be a GET
+		// We will route this request to a random endpoint as this is assumed to just be a GET
 		// More context: https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/526
 		// The above PR will address endpoint admission, but currently any request without a body will be
-		// routed to a random upstream pod.
-		pod := s.director.GetRandomPod()
-		if pod == nil {
+		// routed to a random upstream endpoint.
+		endpoint := s.director.GetRandomEndpoint()
+		if endpoint == nil {
 			return errutil.Error{Code: errutil.Internal, Msg: "no pods available in datastore"}
 		}
-		reqCtx.TargetEndpoint = pod.GetIPAddress() + ":" + pod.GetPort()
+		reqCtx.TargetEndpoint = endpoint.GetIPAddress() + ":" + endpoint.GetPort()
 		reqCtx.RequestSize = 0
 		reqCtx.reqHeaderResp = s.generateRequestHeaderResponse(reqCtx)
 		return nil
 
@@ -23,7 +23,7 @@ import (
 
 	"github.com/google/go-cmp/cmp"
 
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
@@ -103,8 +103,8 @@ func (m *mockDirector) HandleResponseReceived(ctx context.Context, reqCtx *Reque
 func (m *mockDirector) HandlePreRequest(ctx context.Context, reqCtx *RequestContext) (*RequestContext, error) {
 	return reqCtx, nil
 }
-func (m *mockDirector) GetRandomPod() *backend.Pod {
-	return &backend.Pod{}
+func (m *mockDirector) GetRandomEndpoint() *datalayer.EndpointMetadata {
+	return &datalayer.EndpointMetadata{}
 }
 func (m *mockDirector) HandleRequest(ctx context.Context, reqCtx *RequestContext) (*RequestContext, error) {
 	return reqCtx, nil
 
@@ -31,7 +31,6 @@ import (
 	"google.golang.org/grpc/status"
 
 	"sigs.k8s.io/controller-runtime/pkg/log"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
 	schedulingtypes "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
@@ -57,7 +56,7 @@ type Director interface {
 	HandleResponseReceived(ctx context.Context, reqCtx *RequestContext) (*RequestContext, error)
 	HandleResponseBodyStreaming(ctx context.Context, reqCtx *RequestContext) (*RequestContext, error)
 	HandleResponseBodyComplete(ctx context.Context, reqCtx *RequestContext) (*RequestContext, error)
-	GetRandomPod() *backend.Pod
+	GetRandomEndpoint() *datalayer.EndpointMetadata
 }
 
 type Datastore interface {
@@ -76,7 +75,7 @@ type StreamingServer struct {
 // Specifically, there are fields related to the ext-proc protocol, and then fields related to the lifecycle of the request.
 // We should split these apart as this monolithic object exposes too much data to too many layers.
 type RequestContext struct {
-	TargetPod                 *backend.Pod
+	TargetPod                 *datalayer.EndpointMetadata
 	TargetEndpoint            string
 	IncomingModelName         string
 	TargetModelName           string
 
@@ -45,8 +45,8 @@ func (m *mockPrepareRequestDataP) Consumes() map[string]any {
 	return m.consumes
 }
 
-func (m *mockPrepareRequestDataP) PrepareRequestData(ctx context.Context, request *types.LLMRequest, pods []types.Pod) error {
-	pods[0].Put(mockProducedDataKey, mockProducedDataType{value: 42})
+func (m *mockPrepareRequestDataP) PrepareRequestData(ctx context.Context, request *types.LLMRequest, endpoints []types.Endpoint) error {
+	endpoints[0].Put(mockProducedDataKey, mockProducedDataType{value: 42})
 	return nil
 }
 
 
@@ -28,7 +28,6 @@ import (
 
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
 	backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
@@ -57,7 +56,7 @@ type Datastore interface {
 
 // Scheduler defines the interface required by the Director for scheduling.
 type Scheduler interface {
-	Schedule(ctx context.Context, request *schedulingtypes.LLMRequest, candidatePods []schedulingtypes.Pod) (result *schedulingtypes.SchedulingResult, err error)
+	Schedule(ctx context.Context, request *schedulingtypes.LLMRequest, candidatePods []schedulingtypes.Endpoint) (result *schedulingtypes.SchedulingResult, err error)
 }
 
 // NewDirectorWithConfig creates a new Director instance with all dependencies.
@@ -245,34 +244,34 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC
 		return reqCtx, errutil.Error{Code: errutil.Internal, Msg: "results must be greater than zero"}
 	}
 	// primary profile is used to set destination
-	targetPods := []*backend.Pod{}
+	targetMetadatas := []*datalayer.EndpointMetadata{}
 	targetEndpoints := []string{}
 
-	for _, pod := range result.ProfileResults[result.PrimaryProfileName].TargetPods {
-		curPod := pod.GetPod()
-		curEndpoint := net.JoinHostPort(curPod.GetIPAddress(), curPod.GetPort())
-		targetPods = append(targetPods, curPod)
+	for _, pod := range result.ProfileResults[result.PrimaryProfileName].TargetEndpoints {
+		curMetadata := pod.GetMetadata()
+		curEndpoint := net.JoinHostPort(curMetadata.GetIPAddress(), curMetadata.GetPort())
+		targetMetadatas = append(targetMetadatas, curMetadata)
 		targetEndpoints = append(targetEndpoints, curEndpoint)
 	}
 
 	multiEndpointString := strings.Join(targetEndpoints, ",")
 	logger.V(logutil.VERBOSE).Info("Request handled", "objectiveKey", reqCtx.ObjectiveKey, "incomingModelName", reqCtx.IncomingModelName, "targetModel", reqCtx.TargetModelName, "endpoint", multiEndpointString)
 
-	reqCtx.TargetPod = targetPods[0]
+	reqCtx.TargetPod = targetMetadatas[0]
 	reqCtx.TargetEndpoint = multiEndpointString
 
 	d.runPreRequestPlugins(ctx, reqCtx.SchedulingRequest, result)
 
 	return reqCtx, nil
 }
 
-func (d *Director) toSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []schedulingtypes.Pod {
-	pm := make([]schedulingtypes.Pod, len(pods))
+func (d *Director) toSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []schedulingtypes.Endpoint {
+	pm := make([]schedulingtypes.Endpoint, len(pods))
 	for i, pod := range pods {
 		if pod.GetAttributes() != nil {
-			pm[i] = &schedulingtypes.PodMetrics{Pod: pod.GetMetadata().Clone(), MetricsState: pod.GetMetrics().Clone(), AttributeMap: pod.GetAttributes().Clone()}
+			pm[i] = &schedulingtypes.PodMetrics{EndpointMetadata: pod.GetMetadata().Clone(), MetricsState: pod.GetMetrics().Clone(), AttributeMap: pod.GetAttributes().Clone()}
 		} else {
-			pm[i] = &schedulingtypes.PodMetrics{Pod: pod.GetMetadata().Clone(), MetricsState: pod.GetMetrics().Clone(), AttributeMap: datalayer.NewAttributes()}
+			pm[i] = &schedulingtypes.PodMetrics{EndpointMetadata: pod.GetMetadata().Clone(), MetricsState: pod.GetMetrics().Clone(), AttributeMap: datalayer.NewAttributes()}
 		}
 	}
 
@@ -323,7 +322,7 @@ func (d *Director) HandleResponseBodyComplete(ctx context.Context, reqCtx *handl
 	return reqCtx, nil
 }
 
-func (d *Director) GetRandomPod() *backend.Pod {
+func (d *Director) GetRandomEndpoint() *datalayer.EndpointMetadata {
 	pods := d.datastore.PodList(datastore.AllPodsPredicate)
 	if len(pods) == 0 {
 		return nil
@@ -346,16 +345,18 @@ func (d *Director) runPreRequestPlugins(ctx context.Context, request *scheduling
 }
 
 func (d *Director) runPrepareDataPlugins(ctx context.Context,
-	request *schedulingtypes.LLMRequest, pods []schedulingtypes.Pod) error {
-	return prepareDataPluginsWithTimeout(prepareDataTimeout, d.requestControlPlugins.prepareDataPlugins, ctx, request, pods)
+	request *schedulingtypes.LLMRequest, endpoints []schedulingtypes.Endpoint) error {
+	return prepareDataPluginsWithTimeout(
+		prepareDataTimeout, d.requestControlPlugins.prepareDataPlugins, ctx, request, endpoints)
+
 }
 
 func (d *Director) runAdmissionPlugins(ctx context.Context,
-	request *schedulingtypes.LLMRequest, pods []schedulingtypes.Pod) bool {
+	request *schedulingtypes.LLMRequest, endpoints []schedulingtypes.Endpoint) bool {
 	loggerDebug := log.FromContext(ctx).V(logutil.DEBUG)
 	for _, plugin := range d.requestControlPlugins.admissionPlugins {
 		loggerDebug.Info("Running AdmitRequest plugin", "plugin", plugin.TypedName())
-		if denyReason := plugin.AdmitRequest(ctx, request, pods); denyReason != nil {
+		if denyReason := plugin.AdmitRequest(ctx, request, endpoints); denyReason != nil {
 			loggerDebug.Info("AdmitRequest plugin denied the request", "plugin", plugin.TypedName(), "reason", denyReason.Error())
 			return false
 		}
@@ -364,34 +365,34 @@ func (d *Director) runAdmissionPlugins(ctx context.Context,
 	return true
 }
 
-func (d *Director) runResponseReceivedPlugins(ctx context.Context, request *schedulingtypes.LLMRequest, response *Response, targetPod *backend.Pod) {
+func (d *Director) runResponseReceivedPlugins(ctx context.Context, request *schedulingtypes.LLMRequest, response *Response, targetEndpoint *datalayer.EndpointMetadata) {
 	loggerDebug := log.FromContext(ctx).V(logutil.DEBUG)
 	for _, plugin := range d.requestControlPlugins.responseReceivedPlugins {
 		loggerDebug.Info("Running ResponseReceived plugin", "plugin", plugin.TypedName())
 		before := time.Now()
-		plugin.ResponseReceived(ctx, request, response, targetPod)
+		plugin.ResponseReceived(ctx, request, response, targetEndpoint)
 		metrics.RecordPluginProcessingLatency(ResponseReceivedExtensionPoint, plugin.TypedName().Type, plugin.TypedName().Name, time.Since(before))
 		loggerDebug.Info("Completed running ResponseReceived plugin successfully", "plugin", plugin.TypedName())
 	}
 }
 
-func (d *Director) runResponseStreamingPlugins(ctx context.Context, request *schedulingtypes.LLMRequest, response *Response, targetPod *backend.Pod) {
+func (d *Director) runResponseStreamingPlugins(ctx context.Context, request *schedulingtypes.LLMRequest, response *Response, targetEndpoint *datalayer.EndpointMetadata) {
 	loggerTrace := log.FromContext(ctx).V(logutil.TRACE)
 	for _, plugin := range d.requestControlPlugins.responseStreamingPlugins {
 		loggerTrace.Info("Running ResponseStreaming plugin", "plugin", plugin.TypedName())
 		before := time.Now()
-		plugin.ResponseStreaming(ctx, request, response, targetPod)
+		plugin.ResponseStreaming(ctx, request, response, targetEndpoint)
 		metrics.RecordPluginProcessingLatency(ResponseStreamingExtensionPoint, plugin.TypedName().Type, plugin.TypedName().Name, time.Since(before))
 		loggerTrace.Info("Completed running ResponseStreaming plugin successfully", "plugin", plugin.TypedName())
 	}
 }
 
-func (d *Director) runResponseCompletePlugins(ctx context.Context, request *schedulingtypes.LLMRequest, response *Response, targetPod *backend.Pod) {
+func (d *Director) runResponseCompletePlugins(ctx context.Context, request *schedulingtypes.LLMRequest, response *Response, targetEndpoint *datalayer.EndpointMetadata) {
 	loggerDebug := log.FromContext(ctx).V(logutil.DEBUG)
 	for _, plugin := range d.requestControlPlugins.responseCompletePlugins {
 		loggerDebug.Info("Running ResponseComplete plugin", "plugin", plugin.TypedName())
 		before := time.Now()
-		plugin.ResponseComplete(ctx, request, response, targetPod)
+		plugin.ResponseComplete(ctx, request, response, targetEndpoint)
 		metrics.RecordPluginProcessingLatency(ResponseCompleteExtensionPoint, plugin.TypedName().Type, plugin.TypedName().Name, time.Since(before))
 		loggerDebug.Info("Completed running ResponseComplete plugin successfully", "plugin", plugin.TypedName())
 	}
Original file line number	Diff line number	Diff line change
`@@ -25,14 +25,13 @@ import (`
`25`	`25`	`"k8s.io/apimachinery/pkg/types"`
`26`	`26`	`"sigs.k8s.io/controller-runtime/pkg/log"`
`27`	`27`
`28`		`- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"`
`29`	`28`	`"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"`
`30`	`29`	`logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"`
`31`	`30`	`)`
`32`	`31`
`33`	`32`	`// FakePodMetrics is an implementation of PodMetrics that doesn't run the async refresh loop.`
`34`	`33`	`type FakePodMetrics struct {`
`35`		`- Pod *backend.Pod`
	`34`	`+ Metadata *datalayer.EndpointMetadata`
`36`	`35`	`Metrics *MetricsState`
`37`	`36`	`Attributes *datalayer.Attributes`
`38`	`37`	`}`
`@@ -41,16 +40,16 @@ func (fpm *FakePodMetrics) String() string {`
`41`	`40`	`return fmt.Sprintf("Metadata: %v; Metrics: %v", fpm.GetMetadata(), fpm.GetMetrics())`
`42`	`41`	`}`
`43`	`42`
`44`		`-func (fpm FakePodMetrics) GetMetadata() backend.Pod {`
`45`		`- return fpm.Pod`
	`43`	`+func (fpm FakePodMetrics) GetMetadata() datalayer.EndpointMetadata {`
	`44`	`+ return fpm.Metadata`
`46`	`45`	`}`
`47`	`46`
`48`	`47`	`func (fpm FakePodMetrics) GetMetrics() MetricsState {`
`49`	`48`	`return fpm.Metrics`
`50`	`49`	`}`
`51`	`50`
`52`	`51`	`func (fpm FakePodMetrics) UpdateMetadata(metadata datalayer.EndpointMetadata) {`
`53`		`- fpm.Pod = metadata`
	`52`	`+ fpm.Metadata = metadata`
`54`	`53`	`}`
`55`	`54`	`func (fpm FakePodMetrics) GetAttributes() datalayer.Attributes {`
`56`	`55`	`return fpm.Attributes`
`@@ -72,7 +71,7 @@ type FakePodMetricsClient struct {`
`72`	`71`	`Res map[types.NamespacedName]*MetricsState`
`73`	`72`	`}`
`74`	`73`
`75`		`-func (f FakePodMetricsClient) FetchMetrics(ctx context.Context, pod backend.Pod, existing MetricsState) (MetricsState, error) {`
	`74`	`+func (f FakePodMetricsClient) FetchMetrics(ctx context.Context, pod datalayer.EndpointMetadata, existing MetricsState) (MetricsState, error) {`
`76`	`75`	`f.errMu.RLock()`
`77`	`76`	`err, ok := f.Err[pod.NamespacedName]`
`78`	`77`	`f.errMu.RUnlock()`
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ import (`
`28`	`28`	`"github.com/prometheus/common/model"`
`29`	`29`	`"go.uber.org/multierr"`
`30`	`30`
`31`		`- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"`
	`31`	`+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"`
`32`	`32`	`)`
`33`	`33`
`34`	`34`	`const (`
`@@ -50,22 +50,22 @@ type PodMetricsClientImpl struct {`
`50`	`50`	`}`
`51`	`51`
`52`	`52`	`// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.`
`53`		`-func (p PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod backend.Pod, existing MetricsState) (MetricsState, error) {`
`54`		`- url := p.getMetricEndpoint(pod)`
	`53`	`+func (p PodMetricsClientImpl) FetchMetrics(ctx context.Context, metadata datalayer.EndpointMetadata, existing MetricsState) (MetricsState, error) {`
	`54`	`+ url := p.getMetricEndpoint(metadata)`
`55`	`55`	`req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)`
`56`	`56`	`if err != nil {`
`57`	`57`	`return nil, fmt.Errorf("failed to create request: %v", err)`
`58`	`58`	`}`
`59`	`59`	`resp, err := p.Client.Do(req)`
`60`	`60`	`if err != nil {`
`61`		`- return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod.NamespacedName, err)`
	`61`	`+ return nil, fmt.Errorf("failed to fetch metrics from %s: %w", metadata.NamespacedName, err)`
`62`	`62`	`}`
`63`	`63`	`defer func() {`
`64`	`64`	`_ = resp.Body.Close()`
`65`	`65`	`}()`
`66`	`66`
`67`	`67`	`if resp.StatusCode != http.StatusOK {`
`68`		`- return nil, fmt.Errorf("unexpected status code from %s: %v", pod.NamespacedName, resp.StatusCode)`
	`68`	`+ return nil, fmt.Errorf("unexpected status code from %s: %v", metadata.NamespacedName, resp.StatusCode)`
`69`	`69`	`}`
`70`	`70`
`71`	`71`	`parser := expfmt.NewTextParser(model.LegacyValidation)`
`@@ -76,8 +76,8 @@ func (p PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod backend.Po`
`76`	`76`	`return p.promToPodMetrics(metricFamilies, existing)`
`77`	`77`	`}`
`78`	`78`
`79`		`-func (p PodMetricsClientImpl) getMetricEndpoint(pod backend.Pod) string {`
`80`		`- return p.ModelServerMetricsScheme + "://" + pod.GetMetricsHost() + p.ModelServerMetricsPath`
	`79`	`+func (p PodMetricsClientImpl) getMetricEndpoint(metadata datalayer.EndpointMetadata) string {`
	`80`	`+ return p.ModelServerMetricsScheme + "://" + metadata.GetMetricsHost() + p.ModelServerMetricsPath`
`81`	`81`	`}`
`82`	`82`
`83`	`83`	`// promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.`
Original file line number	Diff line number	Diff line change
`@@ -408,14 +408,14 @@ func (m *mockPlugin) TypedName() plugins.TypedName { return m.t }`
`408`	`408`	`// Mock Scorer`
`409`	`409`	`type mockScorer struct{ mockPlugin }`
`410`	`410`
`411`		`-func (m mockScorer) Score(context.Context, types.CycleState, *types.LLMRequest, []types.Pod) map[types.Pod]float64 {`
	`411`	`+func (m mockScorer) Score(context.Context, types.CycleState, *types.LLMRequest, []types.Endpoint) map[types.Endpoint]float64 {`
`412`	`412`	`return nil`
`413`	`413`	`}`
`414`	`414`
`415`	`415`	`// Mock Picker`
`416`	`416`	`type mockPicker struct{ mockPlugin }`
`417`	`417`
`418`		`-func (m mockPicker) Pick(context.Context, types.CycleState, []types.ScoredPod) types.ProfileRunResult {`
	`418`	`+func (m mockPicker) Pick(context.Context, types.CycleState, []types.ScoredEndpoint) types.ProfileRunResult {`
`419`	`419`	`return nil`
`420`	`420`	`}`
`421`	`421`
Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ import (`
`23`	`23`
`24`	`24`	`"github.com/google/go-cmp/cmp"`
`25`	`25`
`26`		`- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"`
	`26`	`+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"`
`27`	`27`	`logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"`
`28`	`28`	`)`
`29`	`29`
`@@ -103,8 +103,8 @@ func (m mockDirector) HandleResponseReceived(ctx context.Context, reqCtx Reque`
`103`	`103`	`func (m mockDirector) HandlePreRequest(ctx context.Context, reqCtx RequestContext) (*RequestContext, error) {`
`104`	`104`	`return reqCtx, nil`
`105`	`105`	`}`
`106`		`-func (m mockDirector) GetRandomPod() backend.Pod {`
`107`		`- return &backend.Pod{}`
	`106`	`+func (m mockDirector) GetRandomEndpoint() datalayer.EndpointMetadata {`
	`107`	`+ return &datalayer.EndpointMetadata{}`
`108`	`108`	`}`
`109`	`109`	`func (m mockDirector) HandleRequest(ctx context.Context, reqCtx RequestContext) (*RequestContext, error) {`
`110`	`110`	`return reqCtx, nil`
Original file line number	Diff line number	Diff line change
`@@ -45,8 +45,8 @@ func (m *mockPrepareRequestDataP) Consumes() map[string]any {`
`45`	`45`	`return m.consumes`
`46`	`46`	`}`
`47`	`47`
`48`		`-func (m mockPrepareRequestDataP) PrepareRequestData(ctx context.Context, request types.LLMRequest, pods []types.Pod) error {`
`49`		`- pods[0].Put(mockProducedDataKey, mockProducedDataType{value: 42})`
	`48`	`+func (m mockPrepareRequestDataP) PrepareRequestData(ctx context.Context, request types.LLMRequest, endpoints []types.Endpoint) error {`
	`49`	`+ endpoints[0].Put(mockProducedDataKey, mockProducedDataType{value: 42})`
`50`	`50`	`return nil`
`51`	`51`	`}`
`52`	`52`