Skip to content

Commit

Permalink
Added resource group isolation to DeploymentCache (#103)
Browse files Browse the repository at this point in the history
* Added resource group to DeploymentCache

* PMD

* Matthias' suggestions

---------

Co-authored-by: Matthias Kuhr <[email protected]>
  • Loading branch information
CharlesDuboisSAP and MatKuhr authored Oct 18, 2024
1 parent a54ca59 commit 759fff0
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 113 deletions.
11 changes: 11 additions & 0 deletions core/src/main/java/com/sap/ai/sdk/core/AiCoreService.java
Original file line number Diff line number Diff line change
Expand Up @@ -210,4 +210,15 @@ protected ApiClient getApiClient(@Nonnull final Destination destination) {

return new ApiClient(rt).setBasePath(destination.asHttp().getUri().toString());
}

/**
* Remove all entries from the cache then load all deployments into the cache.
*
* <p><b>Call this whenever a deployment is deleted.</b>
*
* @param resourceGroup the resource group of the deleted deployment, usually "default".
*/
public void reloadCachedDeployments(@Nonnull final String resourceGroup) {
DEPLOYMENT_CACHE.resetCache(client(), resourceGroup);
}
}
57 changes: 20 additions & 37 deletions core/src/main/java/com/sap/ai/sdk/core/DeploymentCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;

Expand All @@ -20,7 +21,7 @@
class DeploymentCache {

/** Cache for deployment ids. The key is the model name and the value is the deployment id. */
protected final Set<AiDeployment> CACHE = new HashSet<>();
private final Map<String, Set<AiDeployment>> CACHE = new ConcurrentHashMap<>();

/**
* Remove all entries from the cache then load all deployments into the cache.
Expand All @@ -30,32 +31,12 @@ class DeploymentCache {
* @param client the API client to query deployments.
* @param resourceGroup the resource group, usually "default".
*/
public void resetCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
clearCache();
loadCache(client, resourceGroup);
}

/**
* Remove all entries from the cache.
*
* <p><b>Call {@link #resetCache} whenever a deployment is deleted.</b>
*/
protected void clearCache() {
CACHE.clear();
}

/**
* Load all deployments into the cache
*
* <p><b>Call {@link #resetCache} whenever a deployment is deleted.</b>
*
* @param client the API client to query deployments.
* @param resourceGroup the resource group, usually "default".
*/
protected void loadCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
void resetCache(@Nonnull final ApiClient client, @Nonnull final String resourceGroup) {
CACHE.remove(resourceGroup);
try {
final var deployments = new DeploymentApi(client).query(resourceGroup).getResources();
CACHE.addAll(deployments);
final var deployments =
new HashSet<>(new DeploymentApi(client).query(resourceGroup).getResources());
CACHE.put(resourceGroup, deployments);
} catch (final OpenApiRequestException e) {
log.error("Failed to load deployments into cache", e);
}
Expand All @@ -72,25 +53,26 @@ protected void loadCache(@Nonnull final ApiClient client, @Nonnull final String
* @throws NoSuchElementException if no running deployment is found for the model.
*/
@Nonnull
public String getDeploymentIdByModel(
String getDeploymentIdByModel(
@Nonnull final ApiClient client,
@Nonnull final String resourceGroup,
@Nonnull final String modelName)
throws NoSuchElementException {
return getDeploymentIdByModel(modelName)
return getDeploymentIdByModel(resourceGroup, modelName)
.orElseGet(
() -> {
resetCache(client, resourceGroup);
return getDeploymentIdByModel(modelName)
return getDeploymentIdByModel(resourceGroup, modelName)
.orElseThrow(
() ->
new NoSuchElementException(
"No running deployment found for model: " + modelName));
});
}

private Optional<String> getDeploymentIdByModel(@Nonnull final String modelName) {
return CACHE.stream()
private Optional<String> getDeploymentIdByModel(
@Nonnull final String resourceGroup, @Nonnull final String modelName) {
return CACHE.getOrDefault(resourceGroup, new HashSet<>()).stream()
.filter(deployment -> isDeploymentOfModel(modelName, deployment))
.findFirst()
.map(AiDeployment::getId);
Expand All @@ -107,25 +89,26 @@ private Optional<String> getDeploymentIdByModel(@Nonnull final String modelName)
* @throws NoSuchElementException if no running deployment is found for the scenario.
*/
@Nonnull
public String getDeploymentIdByScenario(
String getDeploymentIdByScenario(
@Nonnull final ApiClient client,
@Nonnull final String resourceGroup,
@Nonnull final String scenarioId)
throws NoSuchElementException {
return getDeploymentIdByScenario(scenarioId)
return getDeploymentIdByScenario(resourceGroup, scenarioId)
.orElseGet(
() -> {
resetCache(client, resourceGroup);
return getDeploymentIdByScenario(scenarioId)
return getDeploymentIdByScenario(resourceGroup, scenarioId)
.orElseThrow(
() ->
new NoSuchElementException(
"No running deployment found for scenario: " + scenarioId));
});
}

private Optional<String> getDeploymentIdByScenario(@Nonnull final String scenarioId) {
return CACHE.stream()
private Optional<String> getDeploymentIdByScenario(
@Nonnull final String resourceGroup, @Nonnull final String scenarioId) {
return CACHE.getOrDefault(resourceGroup, new HashSet<>()).stream()
.filter(deployment -> scenarioId.equals(deployment.getScenarioId()))
.findFirst()
.map(AiDeployment::getId);
Expand All @@ -138,7 +121,7 @@ private Optional<String> getDeploymentIdByScenario(@Nonnull final String scenari
* @param deployment The deployment.
* @return true if the deployment is of the model.
*/
protected static boolean isDeploymentOfModel(
private static boolean isDeploymentOfModel(
@Nonnull final String modelName, @Nonnull final AiDeployment deployment) {
final var deploymentDetails = deployment.getDetails();
// The AI Core specification doesn't mention that this is nullable, but it can be.
Expand Down
138 changes: 62 additions & 76 deletions core/src/test/java/com/sap/ai/sdk/core/CacheTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import static com.github.tomakehurst.wiremock.client.WireMock.get;
import static com.github.tomakehurst.wiremock.client.WireMock.getRequestedFor;
import static com.github.tomakehurst.wiremock.client.WireMock.urlPathEqualTo;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import com.sap.ai.sdk.core.client.WireMockTestServer;
import org.apache.hc.core5.http.HttpStatus;
import com.sap.cloud.sdk.cloudplatform.connectivity.DefaultHttpDestination;
import java.util.NoSuchElementException;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

Expand All @@ -20,67 +22,24 @@ void setupCache() {
wireMockServer.resetRequests();
}

private static void stubGPT4() {
private static void stubGPT4(String resourceGroup) {
wireMockServer.stubFor(
get(urlPathEqualTo("/v2/lm/deployments"))
.withHeader("AI-Resource-Group", equalTo("default"))
.withHeader("AI-Resource-Group", equalTo(resourceGroup))
.willReturn(
aResponse()
.withStatus(HttpStatus.SC_OK)
.withHeader("content-type", "application/json")
.withBody(
"""
{
"count": 1,
"resources": [
{
"configurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
"configurationName": "gpt-4-32k",
"createdAt": "2024-04-17T15:19:53Z",
"deploymentUrl": "https://api.ai.intprod-eu12.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d19b998f347341aa",
"details": {
"resources": {
"backend_details": {
"model": {
"name": "gpt-4-32k",
"version": "latest"
}
}
},
"scaling": {
"backend_details": {}
}
},
"id": "d19b998f347341aa",
"lastOperation": "CREATE",
"latestRunningConfigurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
"modifiedAt": "2024-05-07T13:05:45Z",
"scenarioId": "foundation-models",
"startTime": "2024-04-17T15:21:15Z",
"status": "RUNNING",
"submissionTime": "2024-04-17T15:20:11Z",
"targetStatus": "RUNNING"
}
]
}
""")));
.withBodyFile("GPT4DeploymentResponse.json")
.withHeader("Content-Type", "application/json")));
}

private static void stubEmpty() {
private static void stubEmpty(String resourceGroup) {
wireMockServer.stubFor(
get(urlPathEqualTo("/v2/lm/deployments"))
.withHeader("AI-Resource-Group", equalTo("default"))
.withHeader("AI-Resource-Group", equalTo(resourceGroup))
.willReturn(
aResponse()
.withStatus(HttpStatus.SC_OK)
.withHeader("content-type", "application/json")
.withBody(
"""
{
"count": 0,
"resources": []
}
""")));
.withBodyFile("emptyDeploymentResponse.json")
.withHeader("content-type", "application/json")));
}

/**
Expand All @@ -94,31 +53,16 @@ private static void stubEmpty() {
*/
@Test
void newDeployment() {
stubGPT4();
cacheUnderTest.loadCache(client, "default");
String resourceGroup = "default";
stubGPT4(resourceGroup);

cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));

cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
}

@Test
void clearCache() {
stubGPT4();
cacheUnderTest.loadCache(client, "default");

cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));

cacheUnderTest.clearCache();

cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
// the deployment is not in the cache anymore, so we need to query it again
wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
}

/**
* The user creates a deployment after starting with an empty cache.
*
Expand All @@ -130,15 +74,57 @@ void clearCache() {
*/
@Test
void newDeploymentAfterReset() {
stubEmpty();
cacheUnderTest.loadCache(client, "default");
stubGPT4();
String resourceGroup = "default";
stubEmpty(resourceGroup);
cacheUnderTest.resetCache(client, resourceGroup);
stubGPT4(resourceGroup);

cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
// 1 reset empty and 1 cache miss
wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));

cacheUnderTest.getDeploymentIdByModel(client, "default", "gpt-4-32k");
cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k");
wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
}

@Test
void resourceGroupIsolation() {
String resourceGroupA = "A";
String resourceGroupB = "B";
stubGPT4(resourceGroupA);
stubGPT4(resourceGroupB);

cacheUnderTest.getDeploymentIdByModel(client, resourceGroupA, "gpt-4-32k");
wireMockServer.verify(
1,
getRequestedFor(urlPathEqualTo("/v2/lm/deployments"))
.withHeader("AI-Resource-Group", equalTo(resourceGroupA)));
wireMockServer.verify(
0,
getRequestedFor(urlPathEqualTo("/v2/lm/deployments"))
.withHeader("AI-Resource-Group", equalTo(resourceGroupB)));
}

@Test
void exceptionDeploymentNotFound() {
String resourceGroup = "default";
stubEmpty(resourceGroup);

assertThatThrownBy(
() -> cacheUnderTest.getDeploymentIdByModel(client, resourceGroup, "gpt-4-32k"))
.isExactlyInstanceOf(NoSuchElementException.class)
.hasMessageContaining("No running deployment found for model: gpt-4-32k");
}

@Test
void resetCache() {
String resourceGroup = "default";
stubGPT4(resourceGroup);
cacheUnderTest.resetCache(client, resourceGroup);
wireMockServer.verify(1, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));

final var destination = DefaultHttpDestination.builder(wireMockServer.baseUrl()).build();
new AiCoreService().withDestination(destination).reloadCachedDeployments(resourceGroup);
wireMockServer.verify(2, getRequestedFor(urlPathEqualTo("/v2/lm/deployments")));
}
}
33 changes: 33 additions & 0 deletions core/src/test/resources/__files/GPT4DeploymentResponse.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"count": 1,
"resources": [
{
"configurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
"configurationName": "gpt-4-32k",
"createdAt": "2024-04-17T15:19:53Z",
"deploymentUrl": "https://api.ai.intprod-eu12.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d19b998f347341aa",
"details": {
"resources": {
"backend_details": {
"model": {
"name": "gpt-4-32k",
"version": "latest"
}
}
},
"scaling": {
"backend_details": {}
}
},
"id": "d19b998f347341aa",
"lastOperation": "CREATE",
"latestRunningConfigurationId": "7652a231-ba9b-4fcc-b473-2c355cb21b61",
"modifiedAt": "2024-05-07T13:05:45Z",
"scenarioId": "foundation-models",
"startTime": "2024-04-17T15:21:15Z",
"status": "RUNNING",
"submissionTime": "2024-04-17T15:20:11Z",
"targetStatus": "RUNNING"
}
]
}
4 changes: 4 additions & 0 deletions core/src/test/resources/__files/emptyDeploymentResponse.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"count": 0,
"resources": []
}

0 comments on commit 759fff0

Please sign in to comment.