You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
from pprint import pprint
from mlx_vlm import load
from mlx_vlm.video_generate import generate
from qwen_vl_utils import process_vision_info
import mlx.core as mx
# Load the model and processor
model_path = "mlx-community/Qwen2-VL-2B-Instruct-4bit"
model, processor = load(model_path)
# Define the video file path and prompt
video_path = "test.mp4"
prompt = "Describe this video."
# Prepare messages for video inference
messages = [
{
"role": "user",
"content": [
{
"type": "video",
"video": video_path,
"max_pixels": 360 * 360, # Resize video frames to this resolution
"fps": 1.0, # Process 1 frame per second
},
{"type": "text", "text": prompt},
],
}
]
# Apply the chat template to format the input text
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
# Use process_vision_info to extract video inputs
image_inputs, video_inputs = process_vision_info(messages)
# Preprocess inputs for the model
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs, # Pass the preprocessed video frames here
padding=True,
return_tensors="pt",
)
# Convert inputs to MLX arrays
input_ids = mx.array(inputs["input_ids"])
pixel_values = mx.array(inputs["pixel_values_videos"]) # Processed video frames
mask = mx.array(inputs["attention_mask"])
image_grid_thw = mx.array(inputs["video_grid_thw"]) # Video grid dimensions
# Prepare kwargs for video-specific parameters
kwargs = {"image_grid_thw": image_grid_thw}
# Generate response
response = generate(
model=model,
processor=processor,
input_ids=input_ids,
pixel_values=pixel_values,
mask=mask,
temp=0.7,
max_tokens=100,
**kwargs,
)
# Print the generated response
pprint(response)
Traceback (most recent call last):
File "/Users/zwanderer/Desktop/2024/vlm/mlx-vlm/mlx_vlm/test.py", line 3, in
from mlx_vlm.video_generate import generate
ModuleNotFoundError: No module named 'mlx_vlm.video_generate'
The text was updated successfully, but these errors were encountered:
Trying to run video inference from a mp4 file
Traceback (most recent call last):
File "/Users/zwanderer/Desktop/2024/vlm/mlx-vlm/mlx_vlm/test.py", line 3, in
from mlx_vlm.video_generate import generate
ModuleNotFoundError: No module named 'mlx_vlm.video_generate'
The text was updated successfully, but these errors were encountered: