Skip to content

Commit

Permalink
moved scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
peytontolbert committed Jan 30, 2024
1 parent 8457c8a commit a46c4f2
Show file tree
Hide file tree
Showing 22 changed files with 341,019 additions and 63,939 deletions.
9 changes: 4 additions & 5 deletions example.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from vision_datasets.iter_over_dataset import ImageDatasetIterator
from swarms import QwenVLMultiModal

model = QwenVLMultiModal(system_prompt="You, as the model, are presented with a visual problem. This could be an image containing various elements that you need to analyze, a graph that requires interpretation, or a visual puzzle. Your task is to examine the visual information carefully and describe your process of understanding and solving the problem.",)

iterator = ImageDatasetIterator(
"coco",
model
model = QwenVLMultiModal(
system_prompt="You, as the model, are presented with a visual problem. This could be an image containing various elements that you need to analyze, a graph that requires interpretation, or a visual puzzle. Your task is to examine the visual information carefully and describe your process of understanding and solving the problem.",
)

iterator = ImageDatasetIterator("coco", model)


# Run the iterator
iterator.run()
40 changes: 40 additions & 0 deletions scripts/addid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import json


def create_image_json(response_file, images_file, output_file):
# Load the response data
with open(response_file, "r") as f:
response_data = json.load(f)

# Create a mapping of image names to IDs
image_id_map = {
entry.get("image_name", "").split("/")[-1]: entry["id"]
for entry in response_data
}

# Read the image file names from processed_images.txt
with open(images_file, "r") as f:
image_names = [line.strip() for line in f]

# Create a list to store the new JSON structure
image_json_data = []

# Map the image names to their IDs
for image_name in image_names:
if image_name in image_id_map:
image_json_data.append({
"image_name": image_name,
"id": image_id_map[image_name],
})

# Write the data to a new JSON file
with open(output_file, "w") as f:
json.dump(image_json_data, f, indent=4)


# Call the function with your file names
create_image_json(
"final_combined_responses.json",
"processed_images.txt",
"image_id_mapping.json",
)
2,192 changes: 0 additions & 2,192 deletions scripts/functions.json

This file was deleted.

37 changes: 0 additions & 37 deletions scripts/functionstest.json

This file was deleted.

161 changes: 161 additions & 0 deletions scripts/gguf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import torch
import json
from transformers.generation import GenerationConfig
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextStreamer,
)
from llama_cpp import Llama
import re
import os
import concurrent.futures
import threading
from queue import Queue

# Create a lock object
lock = threading.Lock()

model_name_or_path = "TheBloke/dolphin-2.6-mistral-7B-dpo-laser-AWQ"
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_name_or_path, use_fast=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path, low_cpu_mem_usage=True, device_map="cuda:0"
)

# Using the text streamer to stream output one token at a time
streamer = TextStreamer(
tokenizer, skip_prompt=True, skip_special_tokens=True
)

# File to store the responses
functions_file = "functions.json"


def expand_qa(features):
prompt = f"""{features}"""
system_message = """When presented with features described by a visual language model, synthesize a function call and generate its output. The function call should be structured to capture specific attributes of the image as detailed by the visual description. Start the function call with the <fn_call> tag and then provide the expected output in JSON format.
[INSTRUCTION]
1. Based on the visual description, create a structured function call under the <fn_call> tag.
2. Generate the expected output of the function call as if it were executed.
[EXAMPLE]
Visual Description: 'A red and white bus with an advertisement on the back, driving through the city streets.'
Synthesized Function Call and Output:
[FUNCTION CALL]
{{
'type': 'object',
'properties': {{
'bus_colors': {{
'type': 'array',
'description': 'The colors of the bus in the image.',
'items': {{
'type': 'string',
'enum': ['red', 'white']
}}
}},
'bus_features': {{
'type': 'string',
'description': 'The features seen on the back of the bus.',
'enum': ['advertisement']
}},
'bus_location': {{
'type': 'string',
'description': 'The location of the bus.',
'enum': ['driving']
}}
}}
}}
[EXPECTED OUTPUT]
{{
'bus_colors': ['red', 'white'],
'bus_features': 'advertisement',
'bus_location': 'driving'
}}
"""
prompt_template = f"""<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant"""
print("before model")
input_ids = tokenizer(
prompt_template, return_tensors="pt"
).input_ids.cuda()
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": prompt},
]
generation_params = {
"do_sample": True,
"temperature": 0.7,
"top_p": 0.95,
"top_k": 40,
"max_new_tokens": 512,
"repetition_penalty": 1.1,
}
try:
outputs = model.generate(
input_ids, streamer=streamer, **generation_params
)
print(outputs)
except Exception as e:
print(f"Error during model processing: {e}")
return "Error processing response"
# Decode the generated tokens to a string
# full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Use regex to find everything after "assistant"
match = re.search(r"assistant\s*(.*)", outputs, re.DOTALL)
if match:
response = match.group(
1
) # Extract everything after "assistant"
else:
response = "No response found after 'assistant'."

print(response)
return response


def save_response(data):
with lock: # Acquire the lock before accessing the file
if os.path.exists(functions_file):
with open(functions_file, "r+") as file:
file_data = json.load(file)
file_data.append(data)
file.seek(0)
json.dump(file_data, file, indent=4)
else:
with open(functions_file, "w") as file:
json.dump([data], file, indent=4)


def process_responses(file_path, output_file_path):
with open(file_path, "r") as file:
data = json.load(file)

def process_item(item):
features = item.get("response", "")
output = expand_qa(features)
item["new_response"] = (
output # Add the new response to the original object
)
save_response(item)

with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(process_item, data)

return data


# Process the responses.json file
updated_data = process_responses("responses.json", "functions.json")


print("Data saved in functions.json")
131 changes: 131 additions & 0 deletions scripts/phi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
import re
import os
import concurrent.futures
import threading
from queue import Queue

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Create a lock object
lock = threading.Lock()
model_name_or_path = "cognitivecomputations/dolphin-2_6-phi-2"
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_name_or_path, torch_dtype="auto", trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path, trust_remote_code=True
)

# File to store the responses
functions_file = "functions.json"


def expand_qa(features):
prompt = f"""{features}"""
system_message = """When presented with features described by a visual language model, synthesize a function call and generate its output. The function call should be structured to capture specific attributes of the image as detailed by the visual description. Start the function call with the <fn_call> tag and then provide the expected output in JSON format.
[INSTRUCTION]
1. Based on the visual description, create a structured function call under the <fn_call> tag.
2. Generate the expected output of the function call as if it were executed.
[EXAMPLE]
Visual Description: 'A red and white bus with an advertisement on the back, driving through the city streets.'
Synthesized Function Call and Output:
[FUNCTION CALL]
{{
'type': 'object',
'properties': {{
'bus_colors': {{
'type': 'array',
'description': 'The colors of the bus in the image.',
'items': {{
'type': 'string',
'enum': ['red', 'white']
}}
}},
'bus_features': {{
'type': 'string',
'description': 'The features seen on the back of the bus.',
'enum': ['advertisement']
}},
'bus_location': {{
'type': 'string',
'description': 'The location of the bus.',
'enum': ['driving']
}}
}}
}}
[EXPECTED OUTPUT]
{{
'bus_colors': ['red', 'white'],
'bus_features': 'advertisement',
'bus_location': 'driving'
}}
"""
prompt_template = f"""<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant"""
input_ids = tokenizer(prompt_template, return_tensors="pt")
outputs = model.generate(**input_ids, max_length=512)
# Decode the generated tokens to a string
full_response = tokenizer.decode(
outputs[0], skip_special_tokens=True
)
# Use regex to find everything after "assistant"
match = re.search(r"assistant\s*(.*)", full_response, re.DOTALL)
if match:
response = match.group(
1
) # Extract everything after "assistant"
else:
response = "No response found after 'assistant'."

print(response)
return response


def save_response(data):
with lock: # Acquire the lock before accessing the file
if os.path.exists(functions_file):
with open(functions_file, "r+") as file:
file_data = json.load(file)
file_data.append(data)
file.seek(0)
json.dump(file_data, file, indent=4)
else:
with open(functions_file, "w") as file:
json.dump([data], file, indent=4)


def process_responses(file_path, output_file_path):
with open(file_path, "r") as file:
data = json.load(file)

def process_item(item):
features = item.get("response", "")
output = expand_qa(features)
item["new_response"] = (
output # Add the new response to the original object
)
save_response(item)

with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(process_item, data)

return data


# Process the responses.json file
updated_data = process_responses("responses.json", "functions.json")


print("Data saved in functions.json")
File renamed without changes.
Loading

0 comments on commit a46c4f2

Please sign in to comment.