From 8e36c713cda534b57fcc8e6bd4af75a26f0e9be9 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Sun, 29 Sep 2024 21:22:37 +0200 Subject: [PATCH] Use chat template --- Applications/LLMEval/ContentView.swift | 11 +++++------ Libraries/LLM/Models.swift | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Applications/LLMEval/ContentView.swift b/Applications/LLMEval/ContentView.swift index 2e76d14..970caba 100644 --- a/Applications/LLMEval/ContentView.swift +++ b/Applications/LLMEval/ContentView.swift @@ -159,7 +159,8 @@ class LLMEvaluator { /// This controls which model loads. `phi3_5_4bit` is one of the smaller ones, so this will fit on /// more devices. - let modelConfiguration = ModelConfiguration.phi3_5_4bit + // let modelConfiguration = ModelConfiguration.phi3_5_4bit + let modelConfiguration = ModelConfiguration.mistral7B4bit /// parameters controlling the output let generateParameters = GenerateParameters(temperature: 0.6) @@ -212,11 +213,9 @@ class LLMEvaluator { do { let modelContainer = try await load() - // augment the prompt as needed - let prompt = modelConfiguration.prepare(prompt: prompt) - - let promptTokens = await modelContainer.perform { _, tokenizer in - tokenizer.encode(text: prompt) + let messages = [["role": "user", "content": prompt]] + let promptTokens = try await modelContainer.perform { _, tokenizer in + try tokenizer.applyChatTemplate(messages: messages) } // each time you generate you will get something new diff --git a/Libraries/LLM/Models.swift b/Libraries/LLM/Models.swift index db44a91..effa841 100644 --- a/Libraries/LLM/Models.swift +++ b/Libraries/LLM/Models.swift @@ -130,7 +130,8 @@ extension ModelConfiguration { public static let mistral7B4bit = ModelConfiguration( id: "mlx-community/Mistral-7B-Instruct-v0.3-4bit", - defaultPrompt: "Describe the Swift language." + defaultPrompt: "Describe the Swift language.", + extraEOSTokens: ["<|im_end|>"] // !! This doesn't seem to work ) { prompt in "[INST] \(prompt) [/INST] " }