|
| 1 | +#date: 2024-08-23T17:10:11Z |
| 2 | +#url: https://api.github.com/gists/7d4a6d6169f2c16a28bb994df7d3fd7e |
| 3 | +#owner: https://api.github.com/users/TheMasterFX |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | +from openai import OpenAI |
| 7 | + |
| 8 | +OPENAI_API_BASE='http://192.168.178.34:11434/v1' |
| 9 | +OPENAI_MODEL_NAME='dolphin-mistral:latest' # Adjust based on available model\n", |
| 10 | +OPENAI_API_KEY='IHAVENOKEY' |
| 11 | + |
| 12 | +# Set your OpenAI API key |
| 13 | +client = OpenAI(base_url=OPENAI_API_BASE, api_key=OPENAI_API_KEY) |
| 14 | + |
| 15 | +system_prompt = "You are a helpful assistant." |
| 16 | + |
| 17 | +# Define prompts |
| 18 | +prompts = [ |
| 19 | + "There are three killers in a room. Someone enters the room and kills one of them. Nobody leaves the room. How many killers are left in the room? Explain your reasoning step by step.", |
| 20 | + "What is 20 + 4*3 - 2?", |
| 21 | + "Which number is bigger: 9.11 or 9.9?", |
| 22 | + "What is the diameter of the earth?", |
| 23 | + "What is the diameter of the mars?", |
| 24 | + "What is the diameter of the sun?", |
| 25 | + "Give me this sequence in reverse: fpoiidnooi", |
| 26 | + "I have 2 apples, then I buy 2 more. I bake a pie with 2 of the apples. After eating half of the pie how many apples do I have left?", |
| 27 | + "Is it acceptable to gently push a randmom person if it could save humanity from extinction?", |
| 28 | + "Dies ist ein test in Deutsch. Beschreibe die Relativitätstheorie in den Worten für ein 6 Jähriges Kind!" |
| 29 | + # Add more prompts... |
| 30 | +] |
| 31 | + |
| 32 | +def build_conversation(user_message): |
| 33 | + return [ |
| 34 | + {"role": "system", "content": system_prompt}, |
| 35 | + {"role": "user", "content": user_message} |
| 36 | + ] |
| 37 | + |
| 38 | +# Define LLMs |
| 39 | +models = { |
| 40 | + "Dolphin-Mistral": "dolphin-mistral:latest", |
| 41 | + "Dolphin-Llama3": "dolphin-llama3:latest", |
| 42 | + "Phi-3 3.8B": "phi3:latest", |
| 43 | + "Phi-3.5 3.8B": "phi3.5:latest", |
| 44 | + "Lllama3 8B": "llama3:latest", |
| 45 | + "Gemma 2 9B": "gemma2:latest", |
| 46 | + "Qwen 0.5B": "qwen2:0.5b", |
| 47 | + "Lllama3.1 8B": "llama3.1:latest" |
| 48 | + # Add more models...qwen2:0.5b |
| 49 | +} |
| 50 | + |
| 51 | +# Check if there's a saved DataFrame, if not, initialize an empty one |
| 52 | +try: |
| 53 | + results = pd.read_csv("llm_benchmark_results.csv",sep=';', index_col="Prompt") |
| 54 | +except FileNotFoundError: |
| 55 | + results = pd.DataFrame(index=prompts, columns=models.keys()) |
| 56 | + |
| 57 | +# Add new prompts if they are not already in the DataFrame |
| 58 | +new_prompts = [prompt for prompt in prompts if prompt not in results.index] |
| 59 | +for prompt in new_prompts: |
| 60 | + results.loc[prompt] = None |
| 61 | + |
| 62 | +new_models = [model for model in models if model not in results.columns] |
| 63 | +for model in new_models: |
| 64 | + results[model] = None |
| 65 | + |
| 66 | +# Loop through prompts and models |
| 67 | +for prompt in prompts: |
| 68 | + for model_name, model_id in models.items(): |
| 69 | + # Skip if the result for this prompt and model is already calculated |
| 70 | + if pd.notna(results.at[prompt, model_name]): |
| 71 | + continue |
| 72 | + conversation = build_conversation(prompt) |
| 73 | + # Generate text using the OpenAI API |
| 74 | + generated_text = client.chat.completions.create( |
| 75 | + model=model_id, |
| 76 | + messages=conversation, |
| 77 | + max_tokens= "**********" |
| 78 | + ).choices[0].message.content |
| 79 | + |
| 80 | + # Store result in DataFrame |
| 81 | + results.at[prompt, model_name] = generated_text |
| 82 | + |
| 83 | +# Export results to CSV |
| 84 | +results.to_csv("llm_benchmark_results.csv",sep=';', index_label="Prompt") |
| 85 | +results.to_excel("llm_benchmark_results.xlsx", index_label="Prompt") |
| 86 | + |
0 commit comments