chore: Add cache for deployment ID (#96)

* chore: add cache for deployment ID * fix * correct merge * fix backend type * fix tests * switch to id + model * remove todo * fix lint issue * add default value * fix typos * add workaround * cache all deployments * fix reverse * fix api
SAP · Sep 2, 2024 · 88e94ca · 88e94ca
1 parent 0f94413
commit 88e94ca
Show file tree

Hide file tree

Showing 8 changed files with 347 additions and 81 deletions.
diff --git a/eslint.config.js b/eslint.config.js
@@ -3,21 +3,39 @@ import flatConfig from '@sap-cloud-sdk/eslint-config/flat-config.js';
 export default [
   ...flatConfig,
   {
-    // Estlint flat config is not supported by eslint-plugin-import.
+    // Eslint flat config is not supported by eslint-plugin-import.
     // https://github.com/import-js/eslint-plugin-import/issues/2556
-    rules: { 'import/namespace': 'off'}
+    rules: {
+      'import/namespace': 'off',
+      'import/no-internal-modules': 'off'
+      // TODO: add this once there is a new release of eslint-plugin-import
+      // 'import/no-internal-modules': [
+      //   'error',
+      //   { allow: ['@sap-cloud-sdk/*/internal.js'] }
+      // ]
+    }
   },
   {
-    ignores: ['**/*.d.ts', '**/dist/**/*', '**/coverage/**/*', 'packages/ai-core/src/client/**/*'],
+    ignores: [
+      '**/*.d.ts',
+      '**/dist/**/*',
+      '**/coverage/**/*',
+      'packages/ai-core/src/client/**/*'
+    ]
   },
   {
-    files: ['**/test-util/**/*.ts', '**/packages/gen-ai-hub/src/orchestration/client/**/*'],
+    files: [
+      '**/test-util/**/*.ts',
+      '**/packages/gen-ai-hub/src/orchestration/client/**/*'
+    ],
     rules: {
       'jsdoc/require-jsdoc': 'off'
     }
   },
   {
-    files: ['**/packages/gen-ai-hub/src/orchestration/client/api/default-api.ts'],
+    files: [
+      '**/packages/gen-ai-hub/src/orchestration/client/api/default-api.ts'
+    ],
     rules: {
       '@typescript-eslint/explicit-module-boundary-types': 'off'
     }

diff --git a/packages/gen-ai-hub/src/index.ts b/packages/gen-ai-hub/src/index.ts
@@ -29,14 +29,9 @@ export { OpenAiClient } from './client/index.js';
 export type {
   ModelDeployment,
   DeploymentIdConfiguration,
-  FoundationModel,
   ModelConfiguration
 } from './utils/index.js';
-export {
-  isDeploymentIdConfiguration,
-  getDeploymentId,
-  resolveDeployment
-} from './utils/index.js';
+export { isDeploymentIdConfiguration, getDeploymentId } from './utils/index.js';
 
 export type {
   OrchestrationCompletionParameters,

diff --git a/packages/gen-ai-hub/src/orchestration/orchestration-client.ts b/packages/gen-ai-hub/src/orchestration/orchestration-client.ts
@@ -1,6 +1,6 @@
 import { executeRequest, CustomRequestConfig } from '@sap-ai-sdk/core';
 import { pickValueIgnoreCase } from '@sap-cloud-sdk/util';
-import { resolveDeployment } from '../utils/deployment-resolver.js';
+import { resolveDeploymentId } from '../utils/deployment-resolver.js';
 import {
   CompletionPostRequest,
   CompletionPostResponse
@@ -26,19 +26,17 @@ export class OrchestrationClient {
     const body = constructCompletionPostRequest(data);
     deploymentId =
       deploymentId ??
-      (
-        await resolveDeployment({
-          scenarioId: 'orchestration',
-          model: {
-            name: data.llmConfig.model_name,
-            version: data.llmConfig.model_version
-          },
-          resourceGroup: pickValueIgnoreCase(
-            requestConfig?.headers,
-            'ai-resource-group'
-          )
-        })
-      ).id;
+      (await resolveDeploymentId({
+        scenarioId: 'orchestration',
+        model: {
+          name: data.llmConfig.model_name,
+          version: data.llmConfig.model_version
+        },
+        resourceGroup: pickValueIgnoreCase(
+          requestConfig?.headers,
+          'ai-resource-group'
+        )
+      }));
 
     const response = await executeRequest(
       {

diff --git a/packages/gen-ai-hub/src/utils/deployment-cache.test.ts b/packages/gen-ai-hub/src/utils/deployment-cache.test.ts
@@ -0,0 +1,127 @@
+import { type AiDeployment } from '@sap-ai-sdk/ai-core';
+import { deploymentCache } from './deployment-cache.js';
+import { FoundationModel } from './model.js';
+
+describe('deployment cache', () => {
+  afterEach(() => {
+    deploymentCache.clear();
+  });
+
+  it('should cache the deployment', () => {
+    const opts = {
+      scenarioId: 'foundation-models',
+      executableId: 'execution-id',
+      model: { name: 'gpt-4o', version: 'latest' }
+    };
+    deploymentCache.set(opts, {
+      id: 'deployment-id',
+      details: {
+        resources: {
+          backendDetails: { model: { name: 'gpt-4o', version: 'latest' } }
+        }
+      }
+    } as unknown as AiDeployment);
+
+    expect(deploymentCache.get(opts)).toEqual({
+      id: 'deployment-id',
+      model: { name: 'gpt-4o', version: 'latest' }
+    });
+  });
+
+  it('should cache all deployments independent of potentially given models', () => {
+    const opts = {
+      scenarioId: 'foundation-models',
+      model: { name: 'gpt-4o', version: 'latest' }
+    };
+
+    deploymentCache.setAll(opts, [
+      mockAiDeployment('deployment-id1', {
+        name: 'gpt-35-turbo',
+        version: 'latest'
+      }),
+      mockAiDeployment('deployment-id2', {
+        name: 'gpt-35-turbo',
+        version: '123'
+      })
+    ]);
+
+    expect(deploymentCache.get(opts)).toBeUndefined();
+    expect(
+      deploymentCache.get({
+        ...opts,
+        model: {
+          name: 'gpt-35-turbo',
+          version: 'latest'
+        }
+      })?.id
+    ).toEqual('deployment-id1');
+    expect(
+      deploymentCache.get({
+        ...opts,
+        model: {
+          name: 'gpt-35-turbo',
+          version: '123'
+        }
+      })?.id
+    ).toEqual('deployment-id2');
+  });
+
+  it('should cache only the first deployments for equal models and versions', () => {
+    const opts = {
+      scenarioId: 'foundation-models'
+    };
+
+    deploymentCache.setAll(opts, [
+      mockAiDeployment('deployment-id1', { name: 'gpt-4o', version: 'latest' }),
+      mockAiDeployment('deployment-id2', { name: 'gpt-4o', version: 'latest' })
+    ]);
+
+    expect(
+      deploymentCache.get({
+        ...opts,
+        model: { name: 'gpt-4o', version: 'latest' }
+      })?.id
+    ).toEqual('deployment-id1');
+  });
+
+  it('should cache only the first deployments for equal models and no versions', () => {
+    const opts = {
+      scenarioId: 'foundation-models'
+    };
+
+    deploymentCache.setAll(opts, [
+      mockAiDeployment('deployment-id1', { name: 'gpt-4o' }),
+      mockAiDeployment('deployment-id2', { name: 'gpt-4o' })
+    ]);
+
+    expect(
+      deploymentCache.get({
+        ...opts,
+        model: { name: 'gpt-4o' }
+      })?.id
+    ).toEqual('deployment-id1');
+  });
+
+  it('should ignore model versions without model name', () => {
+    const opts = {
+      scenarioId: 'foundation-models'
+    };
+
+    deploymentCache.setAll(opts, [
+      mockAiDeployment('deployment-id1'),
+      mockAiDeployment('deployment-id2', { version: 'latest' })
+    ]);
+
+    expect(deploymentCache.get(opts)?.id).toEqual('deployment-id1');
+  });
+});
+
+function mockAiDeployment(id: string, model?: Partial<FoundationModel>) {
+  const backendDetails = model ? { model } : model;
+  return {
+    id,
+    details: {
+      resources: { backendDetails }
+    }
+  } as unknown as AiDeployment;
+}
diff --git a/packages/gen-ai-hub/src/utils/deployment-cache.ts b/packages/gen-ai-hub/src/utils/deployment-cache.ts
@@ -0,0 +1,81 @@
+import { Cache } from '@sap-cloud-sdk/connectivity/internal.js';
+import { type AiDeployment } from '@sap-ai-sdk/ai-core';
+import { type DeploymentResolutionOptions } from './deployment-resolver.js';
+import { extractModel, type FoundationModel } from './model.js';
+
+function getCacheKey({
+  scenarioId,
+  executableId = '',
+  model,
+  resourceGroup = 'default'
+}: DeploymentResolutionOptions) {
+  return `${scenarioId}-${executableId}-${model?.name ?? ''}-${model?.version ?? ''}-${resourceGroup}`;
+}
+
+interface Deployment {
+  id: string;
+  model?: FoundationModel;
+}
+
+/**
+ * Create a cache for deployments.
+ * @param cache - Pure cache object.
+ * @returns The deployment cache.
+ * @internal
+ */
+function createDeploymentCache(cache: Cache<Deployment>) {
+  return {
+    /**
+     * Get a deployment from the cache.
+     * @param opts - Deployment resolution options to get the cached deployment for.
+     * @returns The cached deployment or undefined if not found.
+     */
+    get: (opts: DeploymentResolutionOptions): Deployment | undefined =>
+      cache.get(getCacheKey(opts)),
+    /**
+     * Store a deployment in the cache.
+     * @param opts - Deployment resolution options to set the deployment for.
+     * @param deployment - Deployment to cache.
+     */
+    set: (
+      opts: DeploymentResolutionOptions,
+      deployment: AiDeployment
+    ): void => {
+      cache.set(getCacheKey(opts), {
+        entry: transformDeploymentForCache(deployment)
+      });
+    },
+    /**
+     * Store multiple deployments in the cache, based on the model from the respective AI deployments.
+     * @param opts - Deployment resolution options to set the deployments for. Model information in the deployment resolution options are ignored.
+     * @param deployments - Deployments to retrieve the IDs and models from.
+     */
+    setAll: (
+      opts: Omit<DeploymentResolutionOptions, 'model'>,
+      deployments: AiDeployment[]
+    ): void => {
+      // go backwards to cache the first deployment ID for each model
+      [...deployments].reverse().forEach(deployment => {
+        cache.set(getCacheKey({ ...opts, model: extractModel(deployment) }), {
+          entry: transformDeploymentForCache(deployment)
+        });
+      });
+    },
+    clear: () => cache.clear()
+  };
+}
+
+function transformDeploymentForCache(deployment: AiDeployment): Deployment {
+  return {
+    id: deployment.id,
+    model: extractModel(deployment)
+  };
+}
+
+/**
+ * Cache for deployments.
+ * @internal
+ */
+export const deploymentCache = createDeploymentCache(
+  new Cache(5 * 60 * 1000) // 5 minutes
+);