moved scripts

kyegomez · Jan 30, 2024 · a46c4f2 · a46c4f2
1 parent 8457c8a
commit a46c4f2
Show file tree

Hide file tree

Showing 22 changed files with 341,019 additions and 63,939 deletions.
diff --git a/example.py b/example.py
@@ -1,13 +1,12 @@
 from vision_datasets.iter_over_dataset import ImageDatasetIterator
 from swarms import QwenVLMultiModal
 
-model = QwenVLMultiModal(system_prompt="You, as the model, are presented with a visual problem. This could be an image containing various elements that you need to analyze, a graph that requires interpretation, or a visual puzzle. Your task is to examine the visual information carefully and describe your process of understanding and solving the problem.",)
-
-iterator = ImageDatasetIterator(
-    "coco",
-    model
+model = QwenVLMultiModal(
+    system_prompt="You, as the model, are presented with a visual problem. This could be an image containing various elements that you need to analyze, a graph that requires interpretation, or a visual puzzle. Your task is to examine the visual information carefully and describe your process of understanding and solving the problem.",
 )
 
+iterator = ImageDatasetIterator("coco", model)
+
 
 # Run the iterator
 iterator.run()
diff --git a/scripts/addid.py b/scripts/addid.py
@@ -0,0 +1,40 @@
+import json
+
+
+def create_image_json(response_file, images_file, output_file):
+    # Load the response data
+    with open(response_file, "r") as f:
+        response_data = json.load(f)
+
+    # Create a mapping of image names to IDs
+    image_id_map = {
+        entry.get("image_name", "").split("/")[-1]: entry["id"]
+        for entry in response_data
+    }
+
+    # Read the image file names from processed_images.txt
+    with open(images_file, "r") as f:
+        image_names = [line.strip() for line in f]
+
+    # Create a list to store the new JSON structure
+    image_json_data = []
+
+    # Map the image names to their IDs
+    for image_name in image_names:
+        if image_name in image_id_map:
+            image_json_data.append({
+                "image_name": image_name,
+                "id": image_id_map[image_name],
+            })
+
+    # Write the data to a new JSON file
+    with open(output_file, "w") as f:
+        json.dump(image_json_data, f, indent=4)
+
+
+# Call the function with your file names
+create_image_json(
+    "final_combined_responses.json",
+    "processed_images.txt",
+    "image_id_mapping.json",
+)
diff --git a/scripts/functions.json b/scripts/functions.json
diff --git a/scripts/functionstest.json b/scripts/functionstest.json
diff --git a/scripts/gguf.py b/scripts/gguf.py
@@ -0,0 +1,161 @@
+import torch
+import json
+from transformers.generation import GenerationConfig
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TextStreamer,
+)
+from llama_cpp import Llama
+import re
+import os
+import concurrent.futures
+import threading
+from queue import Queue
+
+# Create a lock object
+lock = threading.Lock()
+
+model_name_or_path = "TheBloke/dolphin-2.6-mistral-7B-dpo-laser-AWQ"
+# Load the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name_or_path, use_fast=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name_or_path, low_cpu_mem_usage=True, device_map="cuda:0"
+)
+
+# Using the text streamer to stream output one token at a time
+streamer = TextStreamer(
+    tokenizer, skip_prompt=True, skip_special_tokens=True
+)
+
+# File to store the responses
+functions_file = "functions.json"
+
+
+def expand_qa(features):
+    prompt = f"""{features}"""
+    system_message = """When presented with features described by a visual language model, synthesize a function call and generate its output. The function call should be structured to capture specific attributes of the image as detailed by the visual description. Start the function call with the <fn_call> tag and then provide the expected output in JSON format.
+
+[INSTRUCTION]
+1. Based on the visual description, create a structured function call under the <fn_call> tag.
+2. Generate the expected output of the function call as if it were executed.
+
+[EXAMPLE]
+Visual Description: 'A red and white bus with an advertisement on the back, driving through the city streets.'
+
+Synthesized Function Call and Output:
+
+[FUNCTION CALL]
+ {{
+  'type': 'object',
+  'properties': {{
+    'bus_colors': {{
+      'type': 'array',
+      'description': 'The colors of the bus in the image.',
+      'items': {{
+        'type': 'string',
+        'enum': ['red', 'white']
+      }}
+    }},
+    'bus_features': {{
+      'type': 'string',
+      'description': 'The features seen on the back of the bus.',
+      'enum': ['advertisement']
+    }},
+    'bus_location': {{
+      'type': 'string',
+      'description': 'The location of the bus.',
+      'enum': ['driving']
+    }}
+  }}
+}}
+
+[EXPECTED OUTPUT]
+{{
+  'bus_colors': ['red', 'white'],
+  'bus_features': 'advertisement',
+  'bus_location': 'driving'
+}}
+"""
+    prompt_template = f"""<|im_start|>system
+    {system_message}<|im_end|>
+    <|im_start|>user
+    {prompt}<|im_end|>
+    <|im_start|>assistant"""
+    print("before model")
+    input_ids = tokenizer(
+        prompt_template, return_tensors="pt"
+    ).input_ids.cuda()
+    messages = [
+        {"role": "system", "content": system_message},
+        {"role": "user", "content": prompt},
+    ]
+    generation_params = {
+        "do_sample": True,
+        "temperature": 0.7,
+        "top_p": 0.95,
+        "top_k": 40,
+        "max_new_tokens": 512,
+        "repetition_penalty": 1.1,
+    }
+    try:
+        outputs = model.generate(
+            input_ids, streamer=streamer, **generation_params
+        )
+        print(outputs)
+    except Exception as e:
+        print(f"Error during model processing: {e}")
+        return "Error processing response"
+    # Decode the generated tokens to a string
+    # full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Use regex to find everything after "assistant"
+    match = re.search(r"assistant\s*(.*)", outputs, re.DOTALL)
+    if match:
+        response = match.group(
+            1
+        )  # Extract everything after "assistant"
+    else:
+        response = "No response found after 'assistant'."
+
+    print(response)
+    return response
+
+
+def save_response(data):
+    with lock:  # Acquire the lock before accessing the file
+        if os.path.exists(functions_file):
+            with open(functions_file, "r+") as file:
+                file_data = json.load(file)
+                file_data.append(data)
+                file.seek(0)
+                json.dump(file_data, file, indent=4)
+        else:
+            with open(functions_file, "w") as file:
+                json.dump([data], file, indent=4)
+
+
+def process_responses(file_path, output_file_path):
+    with open(file_path, "r") as file:
+        data = json.load(file)
+
+    def process_item(item):
+        features = item.get("response", "")
+        output = expand_qa(features)
+        item["new_response"] = (
+            output  # Add the new response to the original object
+        )
+        save_response(item)
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        executor.map(process_item, data)
+
+    return data
+
+
+# Process the responses.json file
+updated_data = process_responses("responses.json", "functions.json")
+
+
+print("Data saved in functions.json")
diff --git a/scripts/phi.py b/scripts/phi.py
@@ -0,0 +1,131 @@
+import torch
+import json
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation import GenerationConfig
+import re
+import os
+import concurrent.futures
+import threading
+from queue import Queue
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Create a lock object
+lock = threading.Lock()
+model_name_or_path = "cognitivecomputations/dolphin-2_6-phi-2"
+# Load the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name_or_path, torch_dtype="auto", trust_remote_code=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name_or_path, trust_remote_code=True
+)
+
+# File to store the responses
+functions_file = "functions.json"
+
+
+def expand_qa(features):
+    prompt = f"""{features}"""
+    system_message = """When presented with features described by a visual language model, synthesize a function call and generate its output. The function call should be structured to capture specific attributes of the image as detailed by the visual description. Start the function call with the <fn_call> tag and then provide the expected output in JSON format.
+
+[INSTRUCTION]
+1. Based on the visual description, create a structured function call under the <fn_call> tag.
+2. Generate the expected output of the function call as if it were executed.
+
+[EXAMPLE]
+Visual Description: 'A red and white bus with an advertisement on the back, driving through the city streets.'
+
+Synthesized Function Call and Output:
+
+[FUNCTION CALL]
+ {{
+  'type': 'object',
+  'properties': {{
+    'bus_colors': {{
+      'type': 'array',
+      'description': 'The colors of the bus in the image.',
+      'items': {{
+        'type': 'string',
+        'enum': ['red', 'white']
+      }}
+    }},
+    'bus_features': {{
+      'type': 'string',
+      'description': 'The features seen on the back of the bus.',
+      'enum': ['advertisement']
+    }},
+    'bus_location': {{
+      'type': 'string',
+      'description': 'The location of the bus.',
+      'enum': ['driving']
+    }}
+  }}
+}}
+
+[EXPECTED OUTPUT]
+{{
+  'bus_colors': ['red', 'white'],
+  'bus_features': 'advertisement',
+  'bus_location': 'driving'
+}}
+"""
+    prompt_template = f"""<|im_start|>system
+    {system_message}<|im_end|>
+    <|im_start|>user
+    {prompt}<|im_end|>
+    <|im_start|>assistant"""
+    input_ids = tokenizer(prompt_template, return_tensors="pt")
+    outputs = model.generate(**input_ids, max_length=512)
+    # Decode the generated tokens to a string
+    full_response = tokenizer.decode(
+        outputs[0], skip_special_tokens=True
+    )
+    # Use regex to find everything after "assistant"
+    match = re.search(r"assistant\s*(.*)", full_response, re.DOTALL)
+    if match:
+        response = match.group(
+            1
+        )  # Extract everything after "assistant"
+    else:
+        response = "No response found after 'assistant'."
+
+    print(response)
+    return response
+
+
+def save_response(data):
+    with lock:  # Acquire the lock before accessing the file
+        if os.path.exists(functions_file):
+            with open(functions_file, "r+") as file:
+                file_data = json.load(file)
+                file_data.append(data)
+                file.seek(0)
+                json.dump(file_data, file, indent=4)
+        else:
+            with open(functions_file, "w") as file:
+                json.dump([data], file, indent=4)
+
+
+def process_responses(file_path, output_file_path):
+    with open(file_path, "r") as file:
+        data = json.load(file)
+
+    def process_item(item):
+        features = item.get("response", "")
+        output = expand_qa(features)
+        item["new_response"] = (
+            output  # Add the new response to the original object
+        )
+        save_response(item)
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        executor.map(process_item, data)
+
+    return data
+
+
+# Process the responses.json file
+updated_data = process_responses("responses.json", "functions.json")
+
+
+print("Data saved in functions.json")
diff --git a/scripts/processed_images.txt → scripts/processed_imagesold.txt b/scripts/processed_images.txt → scripts/processed_imagesold.txt