Skip to content

Commit

Permalink
chore: Add cache for deployment ID (#96)
Browse files Browse the repository at this point in the history
* chore: add cache for deployment ID

* fix

* correct merge

* fix backend type

* fix tests

* switch to id + model

* remove todo

* fix lint issue

* add default value

* fix typos

* add workaround

* cache all deployments

* fix reverse

* fix api
  • Loading branch information
marikaner authored Sep 2, 2024
1 parent 0f94413 commit 88e94ca
Show file tree
Hide file tree
Showing 8 changed files with 347 additions and 81 deletions.
28 changes: 23 additions & 5 deletions eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,39 @@ import flatConfig from '@sap-cloud-sdk/eslint-config/flat-config.js';
export default [
...flatConfig,
{
// Estlint flat config is not supported by eslint-plugin-import.
// Eslint flat config is not supported by eslint-plugin-import.
// https://github.com/import-js/eslint-plugin-import/issues/2556
rules: { 'import/namespace': 'off'}
rules: {
'import/namespace': 'off',
'import/no-internal-modules': 'off'
// TODO: add this once there is a new release of eslint-plugin-import
// 'import/no-internal-modules': [
// 'error',
// { allow: ['@sap-cloud-sdk/*/internal.js'] }
// ]
}
},
{
ignores: ['**/*.d.ts', '**/dist/**/*', '**/coverage/**/*', 'packages/ai-core/src/client/**/*'],
ignores: [
'**/*.d.ts',
'**/dist/**/*',
'**/coverage/**/*',
'packages/ai-core/src/client/**/*'
]
},
{
files: ['**/test-util/**/*.ts', '**/packages/gen-ai-hub/src/orchestration/client/**/*'],
files: [
'**/test-util/**/*.ts',
'**/packages/gen-ai-hub/src/orchestration/client/**/*'
],
rules: {
'jsdoc/require-jsdoc': 'off'
}
},
{
files: ['**/packages/gen-ai-hub/src/orchestration/client/api/default-api.ts'],
files: [
'**/packages/gen-ai-hub/src/orchestration/client/api/default-api.ts'
],
rules: {
'@typescript-eslint/explicit-module-boundary-types': 'off'
}
Expand Down
7 changes: 1 addition & 6 deletions packages/gen-ai-hub/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,9 @@ export { OpenAiClient } from './client/index.js';
export type {
ModelDeployment,
DeploymentIdConfiguration,
FoundationModel,
ModelConfiguration
} from './utils/index.js';
export {
isDeploymentIdConfiguration,
getDeploymentId,
resolveDeployment
} from './utils/index.js';
export { isDeploymentIdConfiguration, getDeploymentId } from './utils/index.js';

export type {
OrchestrationCompletionParameters,
Expand Down
26 changes: 12 additions & 14 deletions packages/gen-ai-hub/src/orchestration/orchestration-client.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { executeRequest, CustomRequestConfig } from '@sap-ai-sdk/core';
import { pickValueIgnoreCase } from '@sap-cloud-sdk/util';
import { resolveDeployment } from '../utils/deployment-resolver.js';
import { resolveDeploymentId } from '../utils/deployment-resolver.js';
import {
CompletionPostRequest,
CompletionPostResponse
Expand All @@ -26,19 +26,17 @@ export class OrchestrationClient {
const body = constructCompletionPostRequest(data);
deploymentId =
deploymentId ??
(
await resolveDeployment({
scenarioId: 'orchestration',
model: {
name: data.llmConfig.model_name,
version: data.llmConfig.model_version
},
resourceGroup: pickValueIgnoreCase(
requestConfig?.headers,
'ai-resource-group'
)
})
).id;
(await resolveDeploymentId({
scenarioId: 'orchestration',
model: {
name: data.llmConfig.model_name,
version: data.llmConfig.model_version
},
resourceGroup: pickValueIgnoreCase(
requestConfig?.headers,
'ai-resource-group'
)
}));

const response = await executeRequest(
{
Expand Down
127 changes: 127 additions & 0 deletions packages/gen-ai-hub/src/utils/deployment-cache.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import { type AiDeployment } from '@sap-ai-sdk/ai-core';
import { deploymentCache } from './deployment-cache.js';
import { FoundationModel } from './model.js';

describe('deployment cache', () => {
afterEach(() => {
deploymentCache.clear();
});

it('should cache the deployment', () => {
const opts = {
scenarioId: 'foundation-models',
executableId: 'execution-id',
model: { name: 'gpt-4o', version: 'latest' }
};
deploymentCache.set(opts, {
id: 'deployment-id',
details: {
resources: {
backendDetails: { model: { name: 'gpt-4o', version: 'latest' } }
}
}
} as unknown as AiDeployment);

expect(deploymentCache.get(opts)).toEqual({
id: 'deployment-id',
model: { name: 'gpt-4o', version: 'latest' }
});
});

it('should cache all deployments independent of potentially given models', () => {
const opts = {
scenarioId: 'foundation-models',
model: { name: 'gpt-4o', version: 'latest' }
};

deploymentCache.setAll(opts, [
mockAiDeployment('deployment-id1', {
name: 'gpt-35-turbo',
version: 'latest'
}),
mockAiDeployment('deployment-id2', {
name: 'gpt-35-turbo',
version: '123'
})
]);

expect(deploymentCache.get(opts)).toBeUndefined();
expect(
deploymentCache.get({
...opts,
model: {
name: 'gpt-35-turbo',
version: 'latest'
}
})?.id
).toEqual('deployment-id1');
expect(
deploymentCache.get({
...opts,
model: {
name: 'gpt-35-turbo',
version: '123'
}
})?.id
).toEqual('deployment-id2');
});

it('should cache only the first deployments for equal models and versions', () => {
const opts = {
scenarioId: 'foundation-models'
};

deploymentCache.setAll(opts, [
mockAiDeployment('deployment-id1', { name: 'gpt-4o', version: 'latest' }),
mockAiDeployment('deployment-id2', { name: 'gpt-4o', version: 'latest' })
]);

expect(
deploymentCache.get({
...opts,
model: { name: 'gpt-4o', version: 'latest' }
})?.id
).toEqual('deployment-id1');
});

it('should cache only the first deployments for equal models and no versions', () => {
const opts = {
scenarioId: 'foundation-models'
};

deploymentCache.setAll(opts, [
mockAiDeployment('deployment-id1', { name: 'gpt-4o' }),
mockAiDeployment('deployment-id2', { name: 'gpt-4o' })
]);

expect(
deploymentCache.get({
...opts,
model: { name: 'gpt-4o' }
})?.id
).toEqual('deployment-id1');
});

it('should ignore model versions without model name', () => {
const opts = {
scenarioId: 'foundation-models'
};

deploymentCache.setAll(opts, [
mockAiDeployment('deployment-id1'),
mockAiDeployment('deployment-id2', { version: 'latest' })
]);

expect(deploymentCache.get(opts)?.id).toEqual('deployment-id1');
});
});

function mockAiDeployment(id: string, model?: Partial<FoundationModel>) {
const backendDetails = model ? { model } : model;
return {
id,
details: {
resources: { backendDetails }
}
} as unknown as AiDeployment;
}
81 changes: 81 additions & 0 deletions packages/gen-ai-hub/src/utils/deployment-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import { Cache } from '@sap-cloud-sdk/connectivity/internal.js';
import { type AiDeployment } from '@sap-ai-sdk/ai-core';
import { type DeploymentResolutionOptions } from './deployment-resolver.js';
import { extractModel, type FoundationModel } from './model.js';

function getCacheKey({
scenarioId,
executableId = '',
model,
resourceGroup = 'default'
}: DeploymentResolutionOptions) {
return `${scenarioId}-${executableId}-${model?.name ?? ''}-${model?.version ?? ''}-${resourceGroup}`;
}

interface Deployment {
id: string;
model?: FoundationModel;
}

/**
* Create a cache for deployments.
* @param cache - Pure cache object.
* @returns The deployment cache.
* @internal
*/
function createDeploymentCache(cache: Cache<Deployment>) {
return {
/**
* Get a deployment from the cache.
* @param opts - Deployment resolution options to get the cached deployment for.
* @returns The cached deployment or undefined if not found.
*/
get: (opts: DeploymentResolutionOptions): Deployment | undefined =>
cache.get(getCacheKey(opts)),
/**
* Store a deployment in the cache.
* @param opts - Deployment resolution options to set the deployment for.
* @param deployment - Deployment to cache.
*/
set: (
opts: DeploymentResolutionOptions,
deployment: AiDeployment
): void => {
cache.set(getCacheKey(opts), {
entry: transformDeploymentForCache(deployment)
});
},
/**
* Store multiple deployments in the cache, based on the model from the respective AI deployments.
* @param opts - Deployment resolution options to set the deployments for. Model information in the deployment resolution options are ignored.
* @param deployments - Deployments to retrieve the IDs and models from.
*/
setAll: (
opts: Omit<DeploymentResolutionOptions, 'model'>,
deployments: AiDeployment[]
): void => {
// go backwards to cache the first deployment ID for each model
[...deployments].reverse().forEach(deployment => {
cache.set(getCacheKey({ ...opts, model: extractModel(deployment) }), {
entry: transformDeploymentForCache(deployment)
});
});
},
clear: () => cache.clear()
};
}

function transformDeploymentForCache(deployment: AiDeployment): Deployment {
return {
id: deployment.id,
model: extractModel(deployment)
};
}

/**
* Cache for deployments.
* @internal
*/
export const deploymentCache = createDeploymentCache(
new Cache(5 * 60 * 1000) // 5 minutes
);
Loading

0 comments on commit 88e94ca

Please sign in to comment.