error video inference #122

zwanderer0 · 2024-11-23T23:20:14Z

Trying to run video inference from a mp4 file

from pprint import pprint
from mlx_vlm import load
from mlx_vlm.video_generate import generate
from qwen_vl_utils import process_vision_info
import mlx.core as mx

# Load the model and processor
model_path = "mlx-community/Qwen2-VL-2B-Instruct-4bit"
model, processor = load(model_path)

# Define the video file path and prompt
video_path = "test.mp4"
prompt = "Describe this video."

# Prepare messages for video inference
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "video",
                "video": video_path,
                "max_pixels": 360 * 360,  # Resize video frames to this resolution
                "fps": 1.0,  # Process 1 frame per second
            },
            {"type": "text", "text": prompt},
        ],
    }
]

# Apply the chat template to format the input text
text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

# Use process_vision_info to extract video inputs
image_inputs, video_inputs = process_vision_info(messages)

# Preprocess inputs for the model
inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,  # Pass the preprocessed video frames here
    padding=True,
    return_tensors="pt",
)

# Convert inputs to MLX arrays
input_ids = mx.array(inputs["input_ids"])
pixel_values = mx.array(inputs["pixel_values_videos"])  # Processed video frames
mask = mx.array(inputs["attention_mask"])
image_grid_thw = mx.array(inputs["video_grid_thw"])  # Video grid dimensions

# Prepare kwargs for video-specific parameters
kwargs = {"image_grid_thw": image_grid_thw}

# Generate response
response = generate(
    model=model,
    processor=processor,
    input_ids=input_ids,
    pixel_values=pixel_values,
    mask=mask,
    temp=0.7,
    max_tokens=100,
    **kwargs,
)

# Print the generated response
pprint(response)

Traceback (most recent call last):
File "/Users/zwanderer/Desktop/2024/vlm/mlx-vlm/mlx_vlm/test.py", line 3, in
from mlx_vlm.video_generate import generate
ModuleNotFoundError: No module named 'mlx_vlm.video_generate'

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

error video inference #122

error video inference #122

zwanderer0 commented Nov 23, 2024 •

edited

Loading

error video inference #122

error video inference #122

Comments

zwanderer0 commented Nov 23, 2024 • edited Loading

zwanderer0 commented Nov 23, 2024 •

edited

Loading