generated from SAP/repository-template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: Add cache for deployment ID (#96)
* chore: add cache for deployment ID * fix * correct merge * fix backend type * fix tests * switch to id + model * remove todo * fix lint issue * add default value * fix typos * add workaround * cache all deployments * fix reverse * fix api
- Loading branch information
Showing
8 changed files
with
347 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import { type AiDeployment } from '@sap-ai-sdk/ai-core'; | ||
import { deploymentCache } from './deployment-cache.js'; | ||
import { FoundationModel } from './model.js'; | ||
|
||
describe('deployment cache', () => { | ||
afterEach(() => { | ||
deploymentCache.clear(); | ||
}); | ||
|
||
it('should cache the deployment', () => { | ||
const opts = { | ||
scenarioId: 'foundation-models', | ||
executableId: 'execution-id', | ||
model: { name: 'gpt-4o', version: 'latest' } | ||
}; | ||
deploymentCache.set(opts, { | ||
id: 'deployment-id', | ||
details: { | ||
resources: { | ||
backendDetails: { model: { name: 'gpt-4o', version: 'latest' } } | ||
} | ||
} | ||
} as unknown as AiDeployment); | ||
|
||
expect(deploymentCache.get(opts)).toEqual({ | ||
id: 'deployment-id', | ||
model: { name: 'gpt-4o', version: 'latest' } | ||
}); | ||
}); | ||
|
||
it('should cache all deployments independent of potentially given models', () => { | ||
const opts = { | ||
scenarioId: 'foundation-models', | ||
model: { name: 'gpt-4o', version: 'latest' } | ||
}; | ||
|
||
deploymentCache.setAll(opts, [ | ||
mockAiDeployment('deployment-id1', { | ||
name: 'gpt-35-turbo', | ||
version: 'latest' | ||
}), | ||
mockAiDeployment('deployment-id2', { | ||
name: 'gpt-35-turbo', | ||
version: '123' | ||
}) | ||
]); | ||
|
||
expect(deploymentCache.get(opts)).toBeUndefined(); | ||
expect( | ||
deploymentCache.get({ | ||
...opts, | ||
model: { | ||
name: 'gpt-35-turbo', | ||
version: 'latest' | ||
} | ||
})?.id | ||
).toEqual('deployment-id1'); | ||
expect( | ||
deploymentCache.get({ | ||
...opts, | ||
model: { | ||
name: 'gpt-35-turbo', | ||
version: '123' | ||
} | ||
})?.id | ||
).toEqual('deployment-id2'); | ||
}); | ||
|
||
it('should cache only the first deployments for equal models and versions', () => { | ||
const opts = { | ||
scenarioId: 'foundation-models' | ||
}; | ||
|
||
deploymentCache.setAll(opts, [ | ||
mockAiDeployment('deployment-id1', { name: 'gpt-4o', version: 'latest' }), | ||
mockAiDeployment('deployment-id2', { name: 'gpt-4o', version: 'latest' }) | ||
]); | ||
|
||
expect( | ||
deploymentCache.get({ | ||
...opts, | ||
model: { name: 'gpt-4o', version: 'latest' } | ||
})?.id | ||
).toEqual('deployment-id1'); | ||
}); | ||
|
||
it('should cache only the first deployments for equal models and no versions', () => { | ||
const opts = { | ||
scenarioId: 'foundation-models' | ||
}; | ||
|
||
deploymentCache.setAll(opts, [ | ||
mockAiDeployment('deployment-id1', { name: 'gpt-4o' }), | ||
mockAiDeployment('deployment-id2', { name: 'gpt-4o' }) | ||
]); | ||
|
||
expect( | ||
deploymentCache.get({ | ||
...opts, | ||
model: { name: 'gpt-4o' } | ||
})?.id | ||
).toEqual('deployment-id1'); | ||
}); | ||
|
||
it('should ignore model versions without model name', () => { | ||
const opts = { | ||
scenarioId: 'foundation-models' | ||
}; | ||
|
||
deploymentCache.setAll(opts, [ | ||
mockAiDeployment('deployment-id1'), | ||
mockAiDeployment('deployment-id2', { version: 'latest' }) | ||
]); | ||
|
||
expect(deploymentCache.get(opts)?.id).toEqual('deployment-id1'); | ||
}); | ||
}); | ||
|
||
function mockAiDeployment(id: string, model?: Partial<FoundationModel>) { | ||
const backendDetails = model ? { model } : model; | ||
return { | ||
id, | ||
details: { | ||
resources: { backendDetails } | ||
} | ||
} as unknown as AiDeployment; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import { Cache } from '@sap-cloud-sdk/connectivity/internal.js'; | ||
import { type AiDeployment } from '@sap-ai-sdk/ai-core'; | ||
import { type DeploymentResolutionOptions } from './deployment-resolver.js'; | ||
import { extractModel, type FoundationModel } from './model.js'; | ||
|
||
function getCacheKey({ | ||
scenarioId, | ||
executableId = '', | ||
model, | ||
resourceGroup = 'default' | ||
}: DeploymentResolutionOptions) { | ||
return `${scenarioId}-${executableId}-${model?.name ?? ''}-${model?.version ?? ''}-${resourceGroup}`; | ||
} | ||
|
||
interface Deployment { | ||
id: string; | ||
model?: FoundationModel; | ||
} | ||
|
||
/** | ||
* Create a cache for deployments. | ||
* @param cache - Pure cache object. | ||
* @returns The deployment cache. | ||
* @internal | ||
*/ | ||
function createDeploymentCache(cache: Cache<Deployment>) { | ||
return { | ||
/** | ||
* Get a deployment from the cache. | ||
* @param opts - Deployment resolution options to get the cached deployment for. | ||
* @returns The cached deployment or undefined if not found. | ||
*/ | ||
get: (opts: DeploymentResolutionOptions): Deployment | undefined => | ||
cache.get(getCacheKey(opts)), | ||
/** | ||
* Store a deployment in the cache. | ||
* @param opts - Deployment resolution options to set the deployment for. | ||
* @param deployment - Deployment to cache. | ||
*/ | ||
set: ( | ||
opts: DeploymentResolutionOptions, | ||
deployment: AiDeployment | ||
): void => { | ||
cache.set(getCacheKey(opts), { | ||
entry: transformDeploymentForCache(deployment) | ||
}); | ||
}, | ||
/** | ||
* Store multiple deployments in the cache, based on the model from the respective AI deployments. | ||
* @param opts - Deployment resolution options to set the deployments for. Model information in the deployment resolution options are ignored. | ||
* @param deployments - Deployments to retrieve the IDs and models from. | ||
*/ | ||
setAll: ( | ||
opts: Omit<DeploymentResolutionOptions, 'model'>, | ||
deployments: AiDeployment[] | ||
): void => { | ||
// go backwards to cache the first deployment ID for each model | ||
[...deployments].reverse().forEach(deployment => { | ||
cache.set(getCacheKey({ ...opts, model: extractModel(deployment) }), { | ||
entry: transformDeploymentForCache(deployment) | ||
}); | ||
}); | ||
}, | ||
clear: () => cache.clear() | ||
}; | ||
} | ||
|
||
function transformDeploymentForCache(deployment: AiDeployment): Deployment { | ||
return { | ||
id: deployment.id, | ||
model: extractModel(deployment) | ||
}; | ||
} | ||
|
||
/** | ||
* Cache for deployments. | ||
* @internal | ||
*/ | ||
export const deploymentCache = createDeploymentCache( | ||
new Cache(5 * 60 * 1000) // 5 minutes | ||
); |
Oops, something went wrong.