1+ import  {  AzureOpenAI  }  from  'openai' ; 
2+ import  {  Embedding ,  EmbeddingVector  }  from  './base-embedding' ; 
3+ 
4+ export  interface  AzureOpenAIEmbeddingConfig  { 
5+     deploymentName : string ;   // Azure deployment name (not model name) 
6+     apiKey : string ;           // Azure OpenAI API key 
7+     azureEndpoint : string ;    // Required: Azure endpoint URL 
8+     apiVersion ?: string ;      // Optional: defaults to stable version 
9+ } 
10+ 
11+ export  class  AzureOpenAIEmbedding  extends  Embedding  { 
12+     private  client : AzureOpenAI ; 
13+     private  config : AzureOpenAIEmbeddingConfig ; 
14+     private  dimension : number  =  1536 ;  // Default dimension for text-embedding-3-small 
15+     protected  maxTokens : number  =  8192 ;  // Maximum tokens for OpenAI embedding models 
16+ 
17+     constructor ( config : AzureOpenAIEmbeddingConfig )  { 
18+         super ( ) ; 
19+         this . config  =  config ; 
20+         
21+         // Validate endpoint format 
22+         if  ( ! config . azureEndpoint . startsWith ( 'https://' ) )  { 
23+             throw  new  Error ( 'Azure OpenAI endpoint must start with https://' ) ; 
24+         } 
25+         
26+         // Initialize Azure OpenAI client with API key authentication 
27+         this . client  =  new  AzureOpenAI ( { 
28+             apiKey : config . apiKey , 
29+             apiVersion : config . apiVersion  ||  '2024-02-01' ,  // Use stable version 
30+             endpoint : config . azureEndpoint , 
31+         } ) ; 
32+     } 
33+ 
34+     async  detectDimension ( testText : string  =  "test" ) : Promise < number >  { 
35+         const  knownModels  =  AzureOpenAIEmbedding . getSupportedModels ( ) ; 
36+         
37+         // Try to infer from deployment name if it matches known patterns 
38+         // Azure deployment names often include the model name with dashes 
39+         for  ( const  [ modelName ,  info ]  of  Object . entries ( knownModels ) )  { 
40+             // Check if deployment name contains model pattern (with dashes instead of dots) 
41+             const  modelPattern  =  modelName . replace ( / \. / g,  '-' ) ; 
42+             if  ( this . config . deploymentName . toLowerCase ( ) . includes ( modelPattern ) )  { 
43+                 return  info . dimension ; 
44+             } 
45+         } 
46+ 
47+         // Dynamic detection via API call for custom deployments 
48+         try  { 
49+             const  processedText  =  this . preprocessText ( testText ) ; 
50+             const  response  =  await  this . client . embeddings . create ( { 
51+                 model : this . config . deploymentName ,  // Use deployment name 
52+                 input : processedText , 
53+                 encoding_format : 'float' , 
54+             } ) ; 
55+             return  response . data [ 0 ] . embedding . length ; 
56+         }  catch  ( error )  { 
57+             const  errorMessage  =  error  instanceof  Error  ? error . message  : 'Unknown error' ; 
58+             
59+             // Re-throw authentication errors 
60+             if  ( errorMessage . includes ( 'API key' )  ||  errorMessage . includes ( 'unauthorized' )  ||  errorMessage . includes ( 'authentication' ) )  { 
61+                 throw  new  Error ( `Azure OpenAI authentication failed: ${ errorMessage }  ` ) ; 
62+             } 
63+             
64+             // Handle deployment not found errors 
65+             if  ( errorMessage . includes ( 'deployment' )  ||  errorMessage . includes ( 'not found' ) )  { 
66+                 throw  new  Error ( `Azure OpenAI deployment '${ this . config . deploymentName }  ' not found: ${ errorMessage }  ` ) ; 
67+             } 
68+             
69+             throw  new  Error ( `Failed to detect dimension for Azure deployment ${ this . config . deploymentName }  : ${ errorMessage }  ` ) ; 
70+         } 
71+     } 
72+ 
73+     async  embed ( text : string ) : Promise < EmbeddingVector >  { 
74+         const  processedText  =  this . preprocessText ( text ) ; 
75+         
76+         // Check if we need to detect dimension 
77+         const  knownModels  =  AzureOpenAIEmbedding . getSupportedModels ( ) ; 
78+         let  needsDimensionDetection  =  true ; 
79+         
80+         for  ( const  [ modelName ,  info ]  of  Object . entries ( knownModels ) )  { 
81+             const  modelPattern  =  modelName . replace ( / \. / g,  '-' ) ; 
82+             if  ( this . config . deploymentName . toLowerCase ( ) . includes ( modelPattern ) )  { 
83+                 this . dimension  =  info . dimension ; 
84+                 needsDimensionDetection  =  false ; 
85+                 break ; 
86+             } 
87+         } 
88+         
89+         if  ( needsDimensionDetection  &&  this . dimension  ===  1536 )  { 
90+             // Only detect if we haven't already and are using default 
91+             this . dimension  =  await  this . detectDimension ( ) ; 
92+         } 
93+         
94+         try  { 
95+             const  response  =  await  this . client . embeddings . create ( { 
96+                 model : this . config . deploymentName ,  // Use deployment name 
97+                 input : processedText , 
98+                 encoding_format : 'float' , 
99+             } ) ; 
100+ 
101+             // Update dimension from actual response 
102+             this . dimension  =  response . data [ 0 ] . embedding . length ; 
103+ 
104+             return  { 
105+                 vector : response . data [ 0 ] . embedding , 
106+                 dimension : this . dimension 
107+             } ; 
108+         }  catch  ( error )  { 
109+             const  errorMessage  =  error  instanceof  Error  ? error . message  : 'Unknown error' ; 
110+             
111+             // Provide specific error messages for common Azure issues 
112+             if  ( errorMessage . includes ( 'API key' )  ||  errorMessage . includes ( 'unauthorized' ) )  { 
113+                 throw  new  Error ( `Azure OpenAI authentication failed: ${ errorMessage }  ` ) ; 
114+             } 
115+             
116+             if  ( errorMessage . includes ( 'deployment' )  ||  errorMessage . includes ( 'not found' ) )  { 
117+                 throw  new  Error ( `Azure OpenAI deployment '${ this . config . deploymentName }  ' not found: ${ errorMessage }  ` ) ; 
118+             } 
119+             
120+             if  ( errorMessage . includes ( 'rate limit' )  ||  errorMessage . includes ( 'quota' ) )  { 
121+                 throw  new  Error ( `Azure OpenAI rate limit exceeded: ${ errorMessage }  ` ) ; 
122+             } 
123+             
124+             throw  new  Error ( `Failed to generate Azure OpenAI embedding: ${ errorMessage }  ` ) ; 
125+         } 
126+     } 
127+ 
128+     async  embedBatch ( texts : string [ ] ) : Promise < EmbeddingVector [ ] >  { 
129+         const  processedTexts  =  this . preprocessTexts ( texts ) ; 
130+         
131+         // Check if we need to detect dimension 
132+         const  knownModels  =  AzureOpenAIEmbedding . getSupportedModels ( ) ; 
133+         let  needsDimensionDetection  =  true ; 
134+         
135+         for  ( const  [ modelName ,  info ]  of  Object . entries ( knownModels ) )  { 
136+             const  modelPattern  =  modelName . replace ( / \. / g,  '-' ) ; 
137+             if  ( this . config . deploymentName . toLowerCase ( ) . includes ( modelPattern ) )  { 
138+                 this . dimension  =  info . dimension ; 
139+                 needsDimensionDetection  =  false ; 
140+                 break ; 
141+             } 
142+         } 
143+         
144+         if  ( needsDimensionDetection  &&  this . dimension  ===  1536 )  { 
145+             this . dimension  =  await  this . detectDimension ( ) ; 
146+         } 
147+         
148+         try  { 
149+             const  response  =  await  this . client . embeddings . create ( { 
150+                 model : this . config . deploymentName ,  // Use deployment name 
151+                 input : processedTexts , 
152+                 encoding_format : 'float' , 
153+             } ) ; 
154+ 
155+             // Update dimension from actual response 
156+             this . dimension  =  response . data [ 0 ] . embedding . length ; 
157+ 
158+             return  response . data . map ( ( item )  =>  ( { 
159+                 vector : item . embedding , 
160+                 dimension : this . dimension 
161+             } ) ) ; 
162+         }  catch  ( error )  { 
163+             const  errorMessage  =  error  instanceof  Error  ? error . message  : 'Unknown error' ; 
164+             
165+             // Provide specific error messages for common Azure issues 
166+             if  ( errorMessage . includes ( 'API key' )  ||  errorMessage . includes ( 'unauthorized' ) )  { 
167+                 throw  new  Error ( `Azure OpenAI authentication failed: ${ errorMessage }  ` ) ; 
168+             } 
169+             
170+             if  ( errorMessage . includes ( 'deployment' )  ||  errorMessage . includes ( 'not found' ) )  { 
171+                 throw  new  Error ( `Azure OpenAI deployment '${ this . config . deploymentName }  ' not found: ${ errorMessage }  ` ) ; 
172+             } 
173+             
174+             if  ( errorMessage . includes ( 'rate limit' )  ||  errorMessage . includes ( 'quota' ) )  { 
175+                 throw  new  Error ( `Azure OpenAI rate limit exceeded: ${ errorMessage }  ` ) ; 
176+             } 
177+             
178+             throw  new  Error ( `Failed to generate Azure OpenAI batch embeddings: ${ errorMessage }  ` ) ; 
179+         } 
180+     } 
181+ 
182+     getDimension ( ) : number  { 
183+         // Check if deployment name matches known models 
184+         const  knownModels  =  AzureOpenAIEmbedding . getSupportedModels ( ) ; 
185+         
186+         for  ( const  [ modelName ,  info ]  of  Object . entries ( knownModels ) )  { 
187+             const  modelPattern  =  modelName . replace ( / \. / g,  '-' ) ; 
188+             if  ( this . config . deploymentName . toLowerCase ( ) . includes ( modelPattern ) )  { 
189+                 return  info . dimension ; 
190+             } 
191+         } 
192+         
193+         // For custom deployments, return the current dimension 
194+         // Note: This may be incorrect until detectDimension() is called 
195+         console . warn ( `[AzureOpenAIEmbedding] ⚠️ getDimension() called for deployment '${ this . config . deploymentName }  ' - returning ${ this . dimension }  . Call detectDimension() first for accurate dimension.` ) ; 
196+         return  this . dimension ; 
197+     } 
198+ 
199+     getProvider ( ) : string  { 
200+         return  'Azure OpenAI' ; 
201+     } 
202+ 
203+     /** 
204+      * Set deployment name 
205+      * @param  deploymentName Azure deployment name 
206+      */ 
207+     async  setDeployment ( deploymentName : string ) : Promise < void >  { 
208+         this . config . deploymentName  =  deploymentName ; 
209+         
210+         // Check if this matches a known model 
211+         const  knownModels  =  AzureOpenAIEmbedding . getSupportedModels ( ) ; 
212+         let  foundKnownModel  =  false ; 
213+         
214+         for  ( const  [ modelName ,  info ]  of  Object . entries ( knownModels ) )  { 
215+             const  modelPattern  =  modelName . replace ( / \. / g,  '-' ) ; 
216+             if  ( deploymentName . toLowerCase ( ) . includes ( modelPattern ) )  { 
217+                 this . dimension  =  info . dimension ; 
218+                 foundKnownModel  =  true ; 
219+                 break ; 
220+             } 
221+         } 
222+         
223+         if  ( ! foundKnownModel )  { 
224+             // Detect dimension for custom deployment 
225+             this . dimension  =  await  this . detectDimension ( ) ; 
226+         } 
227+     } 
228+ 
229+     /** 
230+      * Get client instance (for advanced usage) 
231+      */ 
232+     getClient ( ) : AzureOpenAI  { 
233+         return  this . client ; 
234+     } 
235+ 
236+     /** 
237+      * Get list of supported models (these are OpenAI model names, not Azure deployment names) 
238+      * Azure deployments can be named anything, but often include the model name 
239+      */ 
240+     static  getSupportedModels ( ) : Record < string ,  {  dimension : number ;  description : string  } >  { 
241+         return  { 
242+             'text-embedding-3-small' : { 
243+                 dimension : 1536 , 
244+                 description : 'High performance and cost-effective embedding model (recommended)' 
245+             } , 
246+             'text-embedding-3-large' : { 
247+                 dimension : 3072 , 
248+                 description : 'Highest performance embedding model with larger dimensions' 
249+             } , 
250+             'text-embedding-ada-002' : { 
251+                 dimension : 1536 , 
252+                 description : 'Legacy model (use text-embedding-3-small instead)' 
253+             } 
254+         } ; 
255+     } 
256+ } 
0 commit comments