Added resource group isolation to DeploymentCache (#103)

* Added resource group to DeploymentCache * PMD * Matthias' suggestions --------- Co-authored-by: Matthias Kuhr <[email protected]>
SAP · Oct 18, 2024 · 759fff0 · 759fff0
1 parent a54ca59
commit 759fff0
Show file tree

Hide file tree

Showing 5 changed files with 130 additions and 113 deletions.
diff --git a/core/src/main/java/com/sap/ai/sdk/core/AiCoreService.java b/core/src/main/java/com/sap/ai/sdk/core/AiCoreService.java
@@ -210,4 +210,15 @@ protected ApiClient getApiClient(@Nonnull final Destination destination) {
 
     return new ApiClient(rt).setBasePath(destination.asHttp().getUri().toString());
   }
+
+  /**
+   * Remove all entries from the cache then load all deployments into the cache.
+   *
+   * <p><b>Call this whenever a deployment is deleted.</b>
+   *
+   * @param resourceGroup the resource group of the deleted deployment, usually "default".
+   */
+  public void reloadCachedDeployments(@Nonnull final String resourceGroup) {
+    DEPLOYMENT_CACHE.resetCache(client(), resourceGroup);
+  }
 }
diff --git a/core/src/main/java/com/sap/ai/sdk/core/DeploymentCache.java b/core/src/main/java/com/sap/ai/sdk/core/DeploymentCache.java
@@ -9,6 +9,7 @@
 import java.util.NoSuchElementException;
 import java.util.Optional;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import javax.annotation.Nonnull;
 import lombok.extern.slf4j.Slf4j;
 
@@ -20,7 +21,7 @@
 class DeploymentCache {
 
   /** Cache for deployment ids. The key is the model name and the value is the deployment id. */
-  protected final Set<AiDeployment> CACHE = new HashSet<>();
+  private final Map<String, Set<AiDeployment>> CACHE = new ConcurrentHashMap<>();
 
   /**
    * Remove all entries from the cache then load all deployments into the cache.
@@ -30,32 +31,12 @@ class DeploymentCache {
    * @param client the API client to query deployments.
    * @param resourceGroup the resource group, usually "default".
    */
-  public void resetCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
-    clearCache();
-    loadCache(client, resourceGroup);
-  }
-
-  /**
-   * Remove all entries from the cache.
-   *
-   * <p><b>Call {@link #resetCache} whenever a deployment is deleted.</b>
-   */
-  protected void clearCache() {
-    CACHE.clear();
-  }
-
-  /**
-   * Load all deployments into the cache
-   *
-   * <p><b>Call {@link #resetCache} whenever a deployment is deleted.</b>
-   *
-   * @param client the API client to query deployments.
-   * @param resourceGroup the resource group, usually "default".
-   */
-  protected void loadCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
+  void resetCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
+    CACHE.remove(resourceGroup);
     try {
-      final var deployments = new DeploymentApi(client).query(resourceGroup).getResources();
-      CACHE.addAll(deployments);
+      final var deployments =
+          new HashSet<>(new DeploymentApi(client).query(resourceGroup).getResources());
+      CACHE.put(resourceGroup, deployments);
     } catch (final OpenApiRequestException e) {
       log.error("Failed to load deployments into cache", e);
     }
@@ -72,25 +53,26 @@ protected void loadCache(@Nonnull final ApiClient client, @Nonnull final String
    * @throws NoSuchElementException if no running deployment is found for the model.
    */
   @Nonnull
-  public String getDeploymentIdByModel(
+  String getDeploymentIdByModel(
       @Nonnull final ApiClient client,
       @Nonnull final String resourceGroup,
       @Nonnull final String modelName)
       throws NoSuchElementException {
-    return getDeploymentIdByModel(modelName)
+    return getDeploymentIdByModel(resourceGroup, modelName)
         .orElseGet(
             () -> {
               resetCache(client, resourceGroup);
-              return getDeploymentIdByModel(modelName)
+              return getDeploymentIdByModel(resourceGroup, modelName)
                   .orElseThrow(
                       () ->
                           new NoSuchElementException(
                               "No running deployment found for model: " + modelName));
             });
   }
 
-  private Optional<String> getDeploymentIdByModel(@Nonnull final String modelName) {
-    return CACHE.stream()
+  private Optional<String> getDeploymentIdByModel(
+      @Nonnull final String resourceGroup, @Nonnull final String modelName) {
+    return CACHE.getOrDefault(resourceGroup, new HashSet<>()).stream()
         .filter(deployment -> isDeploymentOfModel(modelName, deployment))
         .findFirst()
         .map(AiDeployment::getId);
@@ -107,25 +89,26 @@ private Optional<String> getDeploymentIdByModel(@Nonnull final String modelName)
    * @throws NoSuchElementException if no running deployment is found for the scenario.
    */
   @Nonnull
-  public String getDeploymentIdByScenario(
+  String getDeploymentIdByScenario(
       @Nonnull final ApiClient client,
       @Nonnull final String resourceGroup,
       @Nonnull final String scenarioId)
       throws NoSuchElementException {
-    return getDeploymentIdByScenario(scenarioId)
+    return getDeploymentIdByScenario(resourceGroup, scenarioId)
         .orElseGet(
             () -> {
               resetCache(client, resourceGroup);
-              return getDeploymentIdByScenario(scenarioId)
+              return getDeploymentIdByScenario(resourceGroup, scenarioId)
                   .orElseThrow(
                       () ->
                           new NoSuchElementException(
                               "No running deployment found for scenario: " + scenarioId));
             });
   }
 
-  private Optional<String> getDeploymentIdByScenario(@Nonnull final String scenarioId) {
-    return CACHE.stream()
+  private Optional<String> getDeploymentIdByScenario(
+      @Nonnull final String resourceGroup, @Nonnull final String scenarioId) {
+    return CACHE.getOrDefault(resourceGroup, new HashSet<>()).stream()
         .filter(deployment -> scenarioId.equals(deployment.getScenarioId()))
         .findFirst()
         .map(AiDeployment::getId);
@@ -138,7 +121,7 @@ private Optional<String> getDeploymentIdByScenario(@Nonnull final String scenari
    * @param deployment The deployment.
    * @return true if the deployment is of the model.
    */
-  protected static boolean isDeploymentOfModel(
+  private static boolean isDeploymentOfModel(
       @Nonnull final String modelName, @Nonnull final AiDeployment deployment) {
     final var deploymentDetails = deployment.getDetails();
     // The AI Core specification doesn't mention that this is nullable, but it can be.

diff --git a/core/src/test/java/com/sap/ai/sdk/core/CacheTest.java b/core/src/test/java/com/sap/ai/sdk/core/CacheTest.java
@@ -5,9 +5,11 @@
 import static com.github.tomakehurst.wiremock.client.WireMock.get;
 import static com.github.tomakehurst.wiremock.client.WireMock.getRequestedFor;
 import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 import com.sap.ai.sdk.core.client.WireMockTestServer;
-import org.apache.hc.core5.http.HttpStatus;
+import com.sap.cloud.sdk.cloudplatform.connectivity.DefaultHttpDestination;
+import java.util.NoSuchElementException;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
@@ -20,67 +22,24 @@ void setupCache() {
     wireMockServer.resetRequests();
   }
 
-  private static void stubGPT4() {
+  private static void stubGPT4(String resourceGroup) {
     wireMockServer.stubFor(
         get(urlPathEqualTo("/v2/lm/deployments"))
-            .withHeader("AI-Resource-Group", equalTo("default"))
+            .withHeader("AI-Resource-Group", equalTo(resourceGroup))
             .willReturn(
                 aResponse()
-                    .withStatus(HttpStatus.SC_OK)
-                    .withHeader("content-type", "application/json")
-                    .withBody(
-                        """
-                        {
-                          "count": 1,
-                          "resources": [
-                            {
-                              "configurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
-                              "configurationName": "gpt-4-32k",
-                              "createdAt": "2024-04-17T15:19:53Z",
-                              "deploymentUrl": "https://api.ai.intprod-eu12.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d19b998f347341aa",
-                              "details": {
-                                "resources": {
-                                  "backend_details": {
-                                    "model": {
-                                      "name": "gpt-4-32k",
-                                      "version": "latest"
-                                    }
-                                  }
-                                },
-                                "scaling": {
-                                  "backend_details": {}
-                                }
-                              },
-                              "id": "d19b998f347341aa",
-                              "lastOperation": "CREATE",
-                              "latestRunningConfigurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
-                              "modifiedAt": "2024-05-07T13:05:45Z",
-                              "scenarioId": "foundation-models",
-                              "startTime": "2024-04-17T15:21:15Z",
-                              "status": "RUNNING",
-                              "submissionTime": "2024-04-17T15:20:11Z",
-                              "targetStatus": "RUNNING"
-                            }
-                          ]
-                        }
-                        """)));
+                    .withBodyFile("GPT4DeploymentResponse.json")
+                    .withHeader("Content-Type", "application/json")));
   }
 
-  private static void stubEmpty() {
+  private static void stubEmpty(String resourceGroup) {
     wireMockServer.stubFor(
         get(urlPathEqualTo("/v2/lm/deployments"))
-            .withHeader("AI-Resource-Group", equalTo("default"))
+            .withHeader("AI-Resource-Group", equalTo(resourceGroup))
             .willReturn(
                 aResponse()
-                    .withStatus(HttpStatus.SC_OK)
-                    .withHeader("content-type", "application/json")
-                    .withBody(
-                        """
-                        {
-                          "count": 0,
-                          "resources": []
-                        }
-                        """)));
+                    .withBodyFile("emptyDeploymentResponse.json")
+                    .withHeader("content-type", "application/json")));
   }
 
   /**
@@ -94,31 +53,16 @@ private static void stubEmpty() {
    */
   @Test
   void newDeployment() {
-    stubGPT4();
-    cacheUnderTest.loadCache(client, "default");
+    String resourceGroup = "default";
+    stubGPT4(resourceGroup);
 
-    cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
+    cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
     wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
 
-    cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
+    cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
     wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
   }
 
-  @Test
-  void clearCache() {
-    stubGPT4();
-    cacheUnderTest.loadCache(client, "default");
-
-    cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
-    wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
-
-    cacheUnderTest.clearCache();
-
-    cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
-    // the deployment is not in the cache anymore, so we need to query it again
-    wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
-  }
-
   /**
    * The user creates a deployment after starting with an empty cache.
    *
@@ -130,15 +74,57 @@ void clearCache() {
    */
   @Test
   void newDeploymentAfterReset() {
-    stubEmpty();
-    cacheUnderTest.loadCache(client, "default");
-    stubGPT4();
+    String resourceGroup = "default";
+    stubEmpty(resourceGroup);
+    cacheUnderTest.resetCache(client, resourceGroup);
+    stubGPT4(resourceGroup);
 
-    cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
+    cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
     // 1 reset empty and 1 cache miss
     wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
 
-    cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
+    cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
+    wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
+  }
+
+  @Test
+  void resourceGroupIsolation() {
+    String resourceGroupA = "A";
+    String resourceGroupB = "B";
+    stubGPT4(resourceGroupA);
+    stubGPT4(resourceGroupB);
+
+    cacheUnderTest.getDeploymentIdByModel(client, resourceGroupA, "gpt-4-32k");
+    wireMockServer.verify(
+        1,
+        getRequestedFor(urlPathEqualTo("/v2/lm/deployments"))
+            .withHeader("AI-Resource-Group", equalTo(resourceGroupA)));
+    wireMockServer.verify(
+        0,
+        getRequestedFor(urlPathEqualTo("/v2/lm/deployments"))
+            .withHeader("AI-Resource-Group", equalTo(resourceGroupB)));
+  }
+
+  @Test
+  void exceptionDeploymentNotFound() {
+    String resourceGroup = "default";
+    stubEmpty(resourceGroup);
+
+    assertThatThrownBy(
+            () -> cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k"))
+        .isExactlyInstanceOf(NoSuchElementException.class)
+        .hasMessageContaining("No running deployment found for model: gpt-4-32k");
+  }
+
+  @Test
+  void resetCache() {
+    String resourceGroup = "default";
+    stubGPT4(resourceGroup);
+    cacheUnderTest.resetCache(client, resourceGroup);
+    wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
+
+    final var destination = DefaultHttpDestination.builder(wireMockServer.baseUrl()).build();
+    new AiCoreService().withDestination(destination).reloadCachedDeployments(resourceGroup);
     wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
   }
 }
diff --git a/core/src/test/resources/__files/GPT4DeploymentResponse.json b/core/src/test/resources/__files/GPT4DeploymentResponse.json
@@ -0,0 +1,33 @@
+{
+  "count": 1,
+  "resources": [
+    {
+      "configurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
+      "configurationName": "gpt-4-32k",
+      "createdAt": "2024-04-17T15:19:53Z",
+      "deploymentUrl": "https://api.ai.intprod-eu12.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d19b998f347341aa",
+      "details": {
+        "resources": {
+          "backend_details": {
+            "model": {
+              "name": "gpt-4-32k",
+              "version": "latest"
+            }
+          }
+        },
+        "scaling": {
+          "backend_details": {}
+        }
+      },
+      "id": "d19b998f347341aa",
+      "lastOperation": "CREATE",
+      "latestRunningConfigurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
+      "modifiedAt": "2024-05-07T13:05:45Z",
+      "scenarioId": "foundation-models",
+      "startTime": "2024-04-17T15:21:15Z",
+      "status": "RUNNING",
+      "submissionTime": "2024-04-17T15:20:11Z",
+      "targetStatus": "RUNNING"
+    }
+  ]
+}
diff --git a/core/src/test/resources/__files/emptyDeploymentResponse.json b/core/src/test/resources/__files/emptyDeploymentResponse.json
@@ -0,0 +1,4 @@
+{
+  "count": 0,
+  "resources": []
+}