Skip to content

Commit

Permalink
Merge branch 'release/2.7.0' of https://github.com/phidatahq/phidata
Browse files Browse the repository at this point in the history
…into lumalabs-video-generation
  • Loading branch information
dirkvolter committed Dec 11, 2024
2 parents 795ee52 + 6a60da1 commit 2fd27e1
Show file tree
Hide file tree
Showing 17 changed files with 201 additions and 56 deletions.
2 changes: 1 addition & 1 deletion cookbook/agents/15_generate_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
tools=[ModelsLabs()],
description="You are an AI agent that can generate videos using the ModelsLabs API.",
instructions=[
"When the user asks you to create a video, use the `create_video` tool to create the video.",
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
"The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.",
"Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.",
],
Expand Down
4 changes: 2 additions & 2 deletions cookbook/agents/43_generate_replicate_video.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.replicate import ReplicateToolkit
from phi.tools.replicate import ReplicateTools

"""Create an agent specialized for Replicate AI content generation"""

video_agent = Agent(
name="Video Generator Agent",
model=OpenAIChat(id="gpt-4o"),
tools=[
ReplicateToolkit(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405")
ReplicateTools(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405")
],
description="You are an AI agent that can generate videos using the Replicate API.",
instructions=[
Expand Down
10 changes: 4 additions & 6 deletions cookbook/agents/44_generate_replicate_image.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.replicate import ReplicateToolkit
from phi.tools.replicate import ReplicateTools

"""Create an agent specialized for Replicate AI content generation"""

video_agent = Agent(
image_agent = Agent(
name="Image Generator Agent",
model=OpenAIChat(id="gpt-4o"),
tools=[
ReplicateToolkit(model="luma/photon-flash")
],
tools=[ReplicateTools(model="luma/photon-flash")],
description="You are an AI agent that can generate images using the Replicate API.",
instructions=[
"When the user asks you to create an image, use the `generate_media` tool to create the image.",
Expand All @@ -21,4 +19,4 @@
show_tool_calls=True,
)

video_agent.print_response("Generate an image of a horse in the dessert.")
image_agent.print_response("Generate an image of a horse in the dessert.")
20 changes: 20 additions & 0 deletions cookbook/agents/45_generate_fal_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.fal_tools import FalTools

fal_agent = Agent(
name="Fal Video Generator Agent",
model=OpenAIChat(id="gpt-4o"),
tools=[FalTools("fal-ai/hunyuan-video")],
description="You are an AI agent that can generate videos using the Fal API.",
instructions=[
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
"Return the URL as raw to the user.",
"Don't convert video URL to markdown or anything else.",
],
markdown=True,
debug_mode=True,
show_tool_calls=True,
)

fal_agent.print_response("Generate video of balloon in the ocean")
62 changes: 49 additions & 13 deletions cookbook/playground/multimodal_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,48 +10,84 @@
from phi.model.openai import OpenAIChat
from phi.tools.dalle import Dalle
from phi.tools.models_labs import ModelsLabs
from phi.model.response import FileType
from phi.playground import Playground, serve_playground_app
from phi.storage.agent.sqlite import SqlAgentStorage
from phi.tools.fal_tools import FalTools
from pydantic import BaseModel, Field

image_agent_storage_file: str = "tmp/image_agent.db"

image_agent = Agent(
name="Image Agent",
name="DALL-E Image Agent",
agent_id="image_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[Dalle()],
description="You are an AI agent that can generate images using DALL-E.",
instructions=[
"When the user asks you to create an image, use the `create_image` tool to create the image.",
"The image will be displayed in the UI automatically below your response, so you don't need to show the image URL in your response.",
"Politely and courteously let the user know that the image has been generated and will be displayed below as soon as its ready.",
"Don't provide the URL of the image in the response. Only describe what image was generated."
],
markdown=True,
debug_mode=True,
add_history_to_messages=True,
add_datetime_to_instructions=True,
storage=SqlAgentStorage(table_name="image_agent", db_file="tmp/image_agent.db"),
storage=SqlAgentStorage(table_name="image_agent", db_file=image_agent_storage_file),
)

video_agent = Agent(
name="Video Agent",
agent_id="video_agent",
ml_gif_agent = Agent(
name="ModelsLab GIF Agent",
agent_id="ml_gif_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[ModelsLabs(wait_for_completion=True)],
tools=[ModelsLabs(wait_for_completion=True, file_type=FileType.GIF)],
description="You are an AI agent that can generate gifs using the ModelsLabs API.",
instructions=[
"When the user asks you to create an image, use the `generate_media` tool to create the image.",
"Don't provide the URL of the image in the response. Only describe what image was generated."
],
markdown=True,
debug_mode=True,
add_history_to_messages=True,
add_datetime_to_instructions=True,
storage=SqlAgentStorage(table_name="ml_gif_agent", db_file=image_agent_storage_file),
)

ml_video_agent = Agent(
name="ModelsLab Video Agent",
agent_id="ml_video_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[ModelsLabs(wait_for_completion=True, file_type=FileType.MP4)],
description="You are an AI agent that can generate videos using the ModelsLabs API.",
instructions=[
"When the user asks you to create a video, use the `create_video` tool to create the video.",
"The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.",
"Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.",
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
"Don't provide the URL of the video in the response. Only describe what video was generated."
],
markdown=True,
debug_mode=True,
add_history_to_messages=True,
add_datetime_to_instructions=True,
storage=SqlAgentStorage(table_name="video_agent", db_file="tmp/video_agent.db"),
storage=SqlAgentStorage(table_name="ml_video_agent", db_file=image_agent_storage_file),
)

app = Playground(agents=[image_agent, video_agent]).get_app()
fal_agent = Agent(
name="Fal Video Agent",
agent_id="fal_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[FalTools("fal-ai/hunyuan-video")],
description="You are an AI agent that can generate videos using the Fal API.",
instructions=[
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
"Don't provide the URL of the video in the response. Only describe what video was generated."
],
markdown=True,
debug_mode=True,
add_history_to_messages=True,
add_datetime_to_instructions=True,
storage=SqlAgentStorage(table_name="fal_agent", db_file=image_agent_storage_file),
)


app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent]).get_app(use_async=False)

if __name__ == "__main__":
serve_playground_app("multimodal_agent:app", reload=True)
1 change: 0 additions & 1 deletion phi/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,6 @@ def get_session_data(self) -> Dict[str, Any]:

def get_agent_session(self) -> AgentSession:
"""Get an AgentSession object, which can be saved to the database"""

return AgentSession(
session_id=self.session_id,
agent_id=self.agent_id,
Expand Down
4 changes: 2 additions & 2 deletions phi/llm/google/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ def api_kwargs(self) -> Dict[str, Any]:
return kwargs

def invoke(self, messages: List[Message]):
return self.client.generate_media(contents=self.conform_messages_to_gemini(messages))
return self.client.generate_content(contents=self.conform_messages_to_gemini(messages))

def invoke_stream(self, messages: List[Message]):
yield from self.client.generate_media(
yield from self.client.generate_content(
contents=self.conform_messages_to_gemini(messages),
stream=True,
)
Expand Down
2 changes: 1 addition & 1 deletion phi/llm/openai/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def to_dict(self) -> Dict[str, Any]:
if self.presence_penalty:
_dict["presence_penalty"] = self.presence_penalty
if self.response_format:
_dict["response_format"] = self.response_format
_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
if self.seed is not None:
_dict["seed"] = self.seed
if self.stop:
Expand Down
4 changes: 2 additions & 2 deletions phi/llm/vertexai/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ def convert_messages_to_contents(self, messages: List[Message]) -> List[Any]:
return _contents

def invoke(self, messages: List[Message]) -> GenerationResponse:
return self.client.generate_media(contents=self.convert_messages_to_contents(messages))
return self.client.generate_content(contents=self.convert_messages_to_contents(messages))

def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]:
yield from self.client.generate_media(
yield from self.client.generate_content(
contents=self.convert_messages_to_contents(messages),
stream=True,
)
Expand Down
4 changes: 2 additions & 2 deletions phi/model/google/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def invoke(self, messages: List[Message]):
Returns:
GenerateContentResponse: The response from the model.
"""
return self.get_client().generate_media(contents=self._format_messages(messages))
return self.get_client().generate_content(contents=self._format_messages(messages))

def invoke_stream(self, messages: List[Message]):
"""
Expand All @@ -279,7 +279,7 @@ def invoke_stream(self, messages: List[Message]):
Returns:
Iterator[GenerateContentResponse]: The response from the model as a stream.
"""
yield from self.get_client().generate_media(
yield from self.get_client().generate_content(
contents=self._format_messages(messages),
stream=True,
)
Expand Down
2 changes: 1 addition & 1 deletion phi/model/openai/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def to_dict(self) -> Dict[str, Any]:
if self.presence_penalty is not None:
model_dict["presence_penalty"] = self.presence_penalty
if self.response_format is not None:
model_dict["response_format"] = self.response_format
model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
if self.seed is not None:
model_dict["seed"] = self.seed
if self.stop is not None:
Expand Down
5 changes: 5 additions & 0 deletions phi/model/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ class ModelResponse:
tool_call: Optional[Dict[str, Any]] = None
event: str = ModelResponseEvent.assistant_response.value
created_at: int = int(time())


class FileType(str, Enum):
MP4 = "mp4"
GIF = "gif"
4 changes: 2 additions & 2 deletions phi/model/vertexai/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def invoke(self, messages: List[Message]) -> GenerationResponse:
Returns:
GenerationResponse object containing the response content
"""
return self.get_client().generate_media(contents=self._format_messages(messages))
return self.get_client().generate_content(contents=self._format_messages(messages))

def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]:
"""
Expand All @@ -270,7 +270,7 @@ def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]
Returns:
Iterator[GenerationResponse] object containing the response content
"""
yield from self.get_client().generate_media(
yield from self.get_client().generate_content(
contents=self._format_messages(messages),
stream=True,
)
Expand Down
88 changes: 88 additions & 0 deletions phi/tools/fal_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
pip install fal-client
"""

from os import getenv
from typing import Optional

from phi.agent import Agent
from phi.tools import Toolkit
from phi.utils.log import logger
from phi.model.content import Video, Image
from uuid import uuid4


try:
import fal_client # type: ignore
except ImportError:
raise ImportError("`fal_client` not installed. Please install using `pip install fal-client`")


class FalTools(Toolkit):
def __init__(
self,
api_key: Optional[str] = None,
model: str = "fal-ai/hunyuan-video",
):
super().__init__(name="fal")

self.api_key = api_key or getenv("FAL_KEY")
self.model = model
if not self.api_key:
logger.error("FAL_KEY not set. Please set the FAL_KEY environment variable.")
self.seen_logs: set[str] = set()
self.register(self.generate_media)

def on_queue_update(self, update):
if isinstance(update, fal_client.InProgress) and update.logs:
for log in update.logs:
message = log["message"]
if message not in self.seen_logs:
logger.info(message)
self.seen_logs.add(message)

def generate_media(self, agent: Agent, prompt: str) -> str:
"""
Use this function to run a model with a given prompt.
Args:
prompt (str): A text description of the task.
Returns:
str: Return the result of the model.
"""
try:
result = fal_client.subscribe(
self.model,
arguments={"prompt": prompt},
with_logs=True,
on_queue_update=self.on_queue_update,
)

media_id = str(uuid4())

if "image" in result:
url = result.get("image", {}).get("url", "")
agent.add_image(
Image(
id=media_id,
url=url,
)
)
media_type = "image"
elif "video" in result:
url = result.get("video", {}).get("url", "")
agent.add_video(
Video(
id=media_id,
url=url,
)
)
media_type = "video"
else:
logger.error(f"Unsupported type in result: {result}")
return f"Unsupported type in result: {result}"

return f"{media_type.capitalize()} generated successfully at {url}"
except Exception as e:
logger.error(f"Failed to run model: {e}")
return f"Error: {e}"
Loading

0 comments on commit 2fd27e1

Please sign in to comment.