multiprocess

kyegomez · Jan 31, 2024 · 98dd00e · 98dd00e
1 parent 3b75f66
commit 98dd00e
Show file tree

Hide file tree

Showing 5 changed files with 13,931 additions and 172,099 deletions.
diff --git a/vision_datasets/Dockerfile b/vision_datasets/Dockerfile
@@ -0,0 +1,25 @@
+# Use an official PyTorch image as a parent image
+FROM pytorch/pytorch:latest
+
+# Set the working directory in the container
+WORKDIR /usr/src/app
+
+# Copy the current directory contents into the container at /usr/src/app
+COPY . /usr/src/app
+
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Make port 29500 available to the world outside this container
+# This port can be used for the PyTorch distributed process group
+EXPOSE 29500
+
+# Define environment variable
+ENV NAME World
+# Define environment variables for distributed training
+ENV MASTER_ADDR="localhost"
+ENV MASTER_PORT="29500"
+ENV NCCL_SOCKET_IFNAME=^docker0,lo
+
+# Run app.py when the container launches
+CMD ["python", "functioncallgenerate_mp_mt.py"]
diff --git a/vision_datasets/functioncallgenerate3.py → ...on_datasets/functioncallgenerate_mp_mt.py b/vision_datasets/functioncallgenerate3.py → ...on_datasets/functioncallgenerate_mp_mt.py
@@ -2,39 +2,43 @@
 import json
 import torch.distributed as dist
 import torch.multiprocessing as mp
+from multiprocessing import Process, Queue, set_start_method
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
 import re
 import os
+os.environ['MASTER_ADDR'] = 'localhost'  # Use the appropriate master address
+os.environ['MASTER_PORT'] = '29500'      # Use an appropriate port number
+
+torch.distributed.is_available()
+# File to store the responses
+functions_file = "functions.json"
+features_file = "responses.json"
 
 device = torch.set_default_device("cuda")
 # Create a lock object
-model_name_or_path = (
-    "cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser"
-)
-# Load the model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name_or_path,
-    torch_dtype=torch.float16,
-    fast_attention2=True,
-    trust_remote_code=True,
-)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name_or_path,
-    device_map=("cuda:0"),
-    torch_dtype=torch.float16,
-    trust_remote_code=True,
-    use_safetensors=True
-)
 
-# File to store the responses
-functions_file = "functions.json"
-rank = 2
-world_size = 2
+# Initialization of the model and tokenizer should be done inside the function that runs on each process to ensure they are correctly mapped to the respective device (GPU).
+def setup_model_and_tokenizer(device):
+    model_name_or_path = "cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser"
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name_or_path,
+        torch_dtype=torch.float16,
+        fast_attention2=True,
+        trust_remote_code=True,
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        torch_dtype=torch.float16,
+        trust_remote_code=True,
+        use_safetensors=True
+    )
+    model.to(device)
+    return tokenizer, model
 
-def expand_qa(features, rank, world_size):
-    dist.init_process_group("nccl", rank=rank, world_size=world_size)
-    prompt = f"""{features}"""
+
+def generate_response(tokenizer, model, features):
+    prompt = features
     system_message = """When presented with features described by a visual language model, synthesize a function call and generate its output. The function call should be structured to capture specific attributes of the image as detailed by the visual description. Start the function call with the <fn_call> tag and then provide the expected output in JSON format.
 
 [INSTRUCTION]
@@ -97,6 +101,7 @@ def expand_qa(features, rank, world_size):
     full_response = tokenizer.decode(
         outputs[0], skip_special_tokens=True
     )
+
     # Use regex to find everything after "assistant"
     match = re.search(r"assistant\s*(.*)", full_response, re.DOTALL)
     if match:
@@ -105,7 +110,24 @@ def expand_qa(features, rank, world_size):
         )  # Extract everything after "assistant"
     else:
         response = "No response found after 'assistant'."
+    return response
+
 
+def expand_qa(rank, features_with_ids, output_queue, world_size):
+    torch.cuda.set_device(rank)  # Set the current device to the specified GPU
+    device = torch.device(f"cuda:{rank}")
+    dist.init_process_group("nccl", rank=rank, world_size=world_size)
+    tokenizer, model = setup_model_and_tokenizer(device)
+    print(f"Process {rank} is running on GPU {rank}")
+    for feature in features_with_ids:
+        if rank == (feature["id"] % world_size):
+            response = generate_response(tokenizer, model, feature["response"])
+            output_queue.put({"id": feature["id"], "response": response})
+    # Save the response
+    if rank == 0:
+        for _ in range(world_size):
+            output_queue.put("DONE")
+    dist.destroy_process_group
     return response
 
 
@@ -147,16 +169,50 @@ def process_responses(file_path, output_file_path):
             save_response(item)
     return data
 
-def run_inference(rank, world_size):
-    # Process the responses.json file
-    updated_data = process_responses("responses.json", "functions.json")
-
-print("Data saved in functions.json")
-
+def load_features(features_path, responses_path):
+    # Load the features
+    with open(features_path, 'r') as f:
+        features = json.load(f)
+    processed_ids = set()
+
+    # Load the processed responses
+    try:
+        with open(responses_path, 'r') as f:
+            responses = json.load(f)
+        # Extract the IDs of processed features
+        processed_ids = {response["id"] for response in responses}
+    except FileNotFoundError:
+        # If the responses file doesn't exist, no IDs have been processed
+        processed_ids = set()
+
+    # Filter out features that have already been processed
+    new_features = [feature for feature in features if feature["id"] not in processed_ids]
+
+    return new_features
+
+def write_outputs_from_queue(output_queue, functions_path):
+    with open(functions_path, 'a') as f:
+        while True:
+            output = output_queue.get()  # This will block until an item is available
+            if output == "DONE":
+                break  # Break the loop if a "DONE" signal is received
+            f.write(json.dumps(output) + "\n")
 
 def main():
-    world_size = 2
-    mp.spawn(run_inference, args=(world_size,), nprocs=world_size, join=True)
+    set_start_method("spawn") # Recommended for PyTorch multiprocessing
+    world_size = torch.cuda.device_count()
+    unprocessed_features = load_features(features_file, functions_file)
+    output_queue = Queue()
+    writer_process = Process(target=write_outputs_from_queue, args=(output_queue, functions_file))
+    writer_process.start()
+    features_with_ids = [{"id": feature["id"], "response": feature["response"]} for feature in unprocessed_features]
+    # Ensure features list is properly divided among processes or handled per your logic
+    # This example assumes a simplistic division, which may need adjustment
+    mp.spawn(expand_qa, args=(features_with_ids, output_queue, world_size), nprocs=world_size, join=True)
+
+    # Signal the writer process that processing is done
+    output_queue.put("DONE")
+    writer_process.join()
 
 
 if __name__ == "__main__":

diff --git a/vision_datasets/functions.json b/vision_datasets/functions.json
@@ -19570,5 +19570,17 @@
         "response": "The image shows a construction site where several workers are working. In the foreground, two workers are crouched down working on a square hole in the ground. Another worker is standing in the background, and two more are in the far background. In the middle ground, there is a long white building with Chinese characters on the second floor. The characters say \"\u5f3a\u5316\u5b89\u5168\u6587\u5316\". There is a red roof on the white building. On the right side of the image, there is a long sidewalk.",
         "id": 3261,
         "new_response": "[FUNCTION CALL]\n{{\n  'type': 'object',\n  'properties': {{\n    'scene_type': {{\n      'type': 'string',\n      'description': 'The type of scene in the image.',\n      'enum': ['construction_site']\n    }},\n    'workers_count': {{\n      'type': 'integer',\n      'description': 'The number of workers in the image.',\n      'minimum': 2,\n      'maximum': 6\n    }},\n    'workers_positions': {{\n      'type': 'array',\n      'description': 'The positions of the workers in the image.',\n      'items': {{\n        'type': 'string',\n        'enum': ['foreground', 'background', 'middle_ground', 'far_background']\n      }}\n    }},\n    'building_colors': {{\n      'type': 'array',\n      'description': 'The colors of the building in the image.',\n      'items': {{\n        'type': 'string',\n        'enum': ['white']\n      }}\n    }},\n    'building_features': {{\n      'type': 'object',\n      'properties': {{\n        'second_floor_text': {{\n          'type': 'string',\n          'description': 'The text on the second floor of the building.',\n          'enum': ['\u5f3a\u5316\u5b89\u5168\u6587\u5316']\n        }},\n        'roof_color': {{\n          'type': 'string',\n          'description': 'The color of the roof of the building.',\n          'enum': ['red']\n        }}\n      }}\n    }},\n    'sidewalk_presence': {{\n      'type': 'boolean',\n      'description': 'Whether there is a sidewalk in the image.',\n      'enum': [true]\n    }}\n  }}\n}}\n\n[EXPECTED OUTPUT]\n{{\n  'scene_type': 'construction_site',\n  'workers_count': 6,\n  'workers_positions': ['foreground', 'background', 'middle_ground', 'far_background'],\n  'building_colors': ['white'],\n  'building_features': {\n    'second_floor_text': '\u5f3a\u5316"
+    },
+    {
+        "image_path": "G:\\images\\combined\\005303_jpg.rf.54be7c16f2ddd8094e4a2fa3bb5e1c82.jpg",
+        "response": "The image shows a construction site where several workers are working. There are three workers in the foreground, all wearing blue shirts and pants, with the one on the left crouching down and the other two standing further back. They are working on a square hole in the ground. In the background, there are three other workers, one near the left side of the image, one in the center, and one on the right side. There is also a white and red sign on a building in the background.",
+        "id": 3262,
+        "new_response": "[FUNCTION CALL]\n{{\n  'type': 'object',\n  'properties': {{\n    'workers_colors': {{\n      'type': 'array',\n      'description': 'The colors of the workers in the image.',\n      'items': {{\n        'type':'string',\n        'enum': ['blue']\n      }}\n    }},\n    'workers_positions': {{\n      'type': 'array',\n      'description': 'The positions of the workers in the image.',\n      'items': {{\n        'type':'string',\n        'enum': ['foreground', 'background']\n      }}\n    }},\n    'workers_features': {{\n      'type':'string',\n      'description': 'The features seen on the construction site.',\n      'enum': ['square hole', 'white and red sign']\n    }}\n  }}\n}}\n\n[EXPECTED OUTPUT]\n{{\n  'workers_colors': ['blue'],\n  'workers_positions': ['foreground', 'background'],\n  'workers_features':'square hole'\n}}"
+    },
+    {
+        "image_path": "G:\\images\\combined\\005304_jpg.rf.503db9e2b279bb8ac286d27e61aac098.jpg",
+        "response": "A group of people standing next to each other wearing hard hats.",
+        "id": 3263,
+        "new_response": "[FUNCTION CALL]\n {{\n  'type': 'object',\n  'properties': {{\n    'people_count': {{\n      'type': 'number',\n      'description': 'The number of people in the image.',\n     'minimum': 1\n    }},\n    'people_attire': {{\n      'type': 'array',\n      'description': 'The attire of the people in the image.',\n      'items': {{\n        'type':'string',\n        'enum': ['hard hats']\n      }}\n    }},\n    'people_location': {{\n      'type':'string',\n      'description': 'The location of the people in the image.',\n      'enum': ['standing']\n    }}\n  }}\n}}\n\n[EXPECTED OUTPUT]\n{{\n  'people_count': 1,\n  'people_attire': ['hard hats'],\n  'people_location':'standing'\n}}"
     }
 ]
diff --git a/vision_datasets/requirements.txt b/vision_datasets/requirements.txt
@@ -0,0 +1,4 @@
+torch
+zetascale
+swarms
+transformers