diff --git a/package.json b/package.json index a8ee66b..80051d3 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,7 @@ }, "dependencies": { "@frost-beta/mlx": "0.0.23", - "@lenml/tokenizers": "1.1.2", + "@lenml/tokenizers": "3.0.1", "sharp": "0.33.5" } } diff --git a/src/tokenizer.ts b/src/tokenizer.ts index fe1d010..5f1d040 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -46,9 +46,9 @@ export class Tokenizer { this.removeWhiteSpaceStripper(); // Get EOS token. const {tokens_to_ids} = this.tokenizer.model; - this.eosToken = tokens_to_ids.get(this.tokenizer.getToken('eos_token')); + this.eosToken = tokens_to_ids.get(tokenizerConfig.eos_token); // Some models do not have a BOS token, they use EOS instead. - this.bosToken = tokens_to_ids.get(this.tokenizer.getToken('bos_token')) ?? this.eosToken; + this.bosToken = tokens_to_ids.get(tokenizerConfig.bos_token) ?? this.eosToken; } encode(text: string) { @@ -67,15 +67,12 @@ export class Tokenizer { // Get the automatically inserted system prompt by passing empty messages. const systemPrompt = this.tokenizer.apply_chat_template([], { add_generation_prompt: false, - tools: null, - } as unknown) as number[]; + }) as number[]; this.systemPromptLength = systemPrompt.length; } const tokens = this.tokenizer.apply_chat_template(messages, { add_generation_prompt: true, - // https://github.com/xenova/transformers.js/issues/879 - tools: null, - } as unknown) as number[]; + }) as number[]; if (trimSystemPrompt) return tokens.slice(this.systemPromptLength); return tokens;