You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
INFO 08-07 06:33:47 async_llm_engine.py:174] Added request chat-c3004d9e0b7346f898079a1c1a5a217c.
ERROR 08-07 06:33:47 async_llm_engine.py:57] Engine background task failed
ERROR 08-07 06:33:47 async_llm_engine.py:57] Traceback (most recent call last):
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 47, in _log_task_completion
ERROR 08-07 06:33:47 async_llm_engine.py:57] return_value = task.result()
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 642, in run_engine_loop
ERROR 08-07 06:33:47 async_llm_engine.py:57] result = task.result()
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 585, in engine_step
ERROR 08-07 06:33:47 async_llm_engine.py:57] request_outputs = await self.engine.step_async(virtual_engine)
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 254, in step_async
ERROR 08-07 06:33:47 async_llm_engine.py:57] output = await self.model_executor.execute_model_async(
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/executor/gpu_executor.py", line 159, in execute_model_async
ERROR 08-07 06:33:47 async_llm_engine.py:57] output = await make_async(self.driver_worker.execute_model
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
ERROR 08-07 06:33:47 async_llm_engine.py:57] result = self.fn(*self.args, **self.kwargs)
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker_base.py", line 273, in execute_model
ERROR 08-07 06:33:47 async_llm_engine.py:57] output = self.model_runner.execute_model(
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
ERROR 08-07 06:33:47 async_llm_engine.py:57] return func(*args, **kwargs)
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 1363, in execute_model
ERROR 08-07 06:33:47 async_llm_engine.py:57] hidden_or_intermediate_states = model_executable(
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
ERROR 08-07 06:33:47 async_llm_engine.py:57] return self._call_impl(*args, **kwargs)
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl
ERROR 08-07 06:33:47 async_llm_engine.py:57] return forward_call(*args, **kwargs)
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/minicpmv.py", line 622, in forward
ERROR 08-07 06:33:47 async_llm_engine.py:57] image_inputs = self._parse_and_validate_inputs(input_ids, **kwargs)
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/minicpmv.py", line 608, in _parse_and_validate_inputs
ERROR 08-07 06:33:47 async_llm_engine.py:57] image_bounds=self._get_image_bounds(input_ids),
ERROR 08-07 06:33:47 async_llm_engine.py:57] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/minicpmv.py", line 566, in _get_image_bounds
ERROR 08-07 06:33:47 async_llm_engine.py:57] return torch.hstack([
ERROR 08-07 06:33:47 async_llm_engine.py:57] RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 2 but got size 3 fortensor number 1in the list.
Exception in callback _log_task_completion(error_callback=<bound method...7f97334c4a30>>)(<Task finishe...n the list.')>) at /usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py:37handle: <Handle _log_task_completion(error_callback=<bound method...7f97334c4a30>>)(<Task finishe...n the list.')>) at /usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py:37>
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 47, in _log_task_completion
return_value = task.result()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 642, in run_engine_loop
result = task.result()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 585, in engine_step
request_outputs = await self.engine.step_async(virtual_engine)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 254, in step_async
output = await self.model_executor.execute_model_async(
File "/usr/local/lib/python3.10/dist-packages/vllm/executor/gpu_executor.py", line 159, in execute_model_async
output = await make_async(self.driver_worker.execute_model
File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker_base.py", line 273, in execute_model
output = self.model_runner.execute_model(
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 1363, in execute_model
hidden_or_intermediate_states = model_executable(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/minicpmv.py", line 622, in forward
image_inputs = self._parse_and_validate_inputs(input_ids, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/minicpmv.py", line 608, in _parse_and_validate_inputs
image_bounds=self._get_image_bounds(input_ids),
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/minicpmv.py", line 566, in _get_image_bounds
return torch.hstack([
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 2 but got size 3 fortensor number 1in the list.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 59, in _log_task_completion
raise AsyncEngineDeadError(
vllm.engine.async_llm_engine.AsyncEngineDeadError: Task finished unexpectedly. This should never happen! Please open an issue on Github. See stack trace above for theactual cause.
INFO 08-07 06:33:47 async_llm_engine.py:181] Aborted request chat-c3004d9e0b7346f898079a1c1a5a217c.
INFO: 127.0.0.1:19320 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 399, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/usr/local/lib/python3.10/dist-packages/uvicorn/middleware/proxy_headers.py", line 70, in __call__
return await self.app(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/fastapi/applications.py", line 1054, in __call__
await super().__call__(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/starlette/applications.py", line 123, in __call__
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 186, in __call__
raise exc
File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 164, in __call__
await self.app(scope, receive, _send)
File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/cors.py", line 85, in __call__
await self.app(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/exceptions.py", line 65, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 756, in __call__
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 297, in handle
await self.app(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 77, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/usr/local/lib/python3.10/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/lib/python3.10/dist-packages/starlette/routing.py", line 72, in app
response = await func(request)
File "/usr/local/lib/python3.10/dist-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
File "/usr/local/lib/python3.10/dist-packages/fastapi/routing.py", line 191, in run_endpoint_function
return await dependant.call(**values)
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 189, in create_chat_completion
generator = await openai_serving_chat.create_chat_completion(
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/serving_chat.py", line 185, in create_chat_completion
return await self.chat_completion_full_generator(
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/serving_chat.py", line 436, in chat_completion_full_generator
async forresin result_generator:
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/rpc/client.py", line 216, in generate
raise request_output
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 2 but got size 3 fortensor number 1in the list.
WARNING 08-07 06:33:48 chat_utils.py:163] Detected image token string in the text prompt. Skipping prompt formatting.
The text was updated successfully, but these errors were encountered:
Your current environment
🐛 Describe the bug
The text was updated successfully, but these errors were encountered: