Merge branch 'release/2.7.0' of https://github.com/phidatahq/phidata …

…into lumalabs-video-generation
agno-agi · Dec 11, 2024 · 2fd27e1 · 2fd27e1
2 parents 795ee52 + 6a60da1
commit 2fd27e1
Show file tree

Hide file tree

Showing 17 changed files with 201 additions and 56 deletions.
diff --git a/cookbook/agents/15_generate_video.py b/cookbook/agents/15_generate_video.py
@@ -7,7 +7,7 @@
     tools=[ModelsLabs()],
     description="You are an AI agent that can generate videos using the ModelsLabs API.",
     instructions=[
-        "When the user asks you to create a video, use the `create_video` tool to create the video.",
+        "When the user asks you to create a video, use the `generate_media` tool to create the video.",
         "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.",
         "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.",
     ],

diff --git a/cookbook/agents/43_generate_replicate_video.py b/cookbook/agents/43_generate_replicate_video.py
@@ -1,14 +1,14 @@
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
-from phi.tools.replicate import ReplicateToolkit
+from phi.tools.replicate import ReplicateTools
 
 """Create an agent specialized for Replicate AI content generation"""
 
 video_agent = Agent(
     name="Video Generator Agent",
     model=OpenAIChat(id="gpt-4o"),
     tools=[
-        ReplicateToolkit(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405")
+        ReplicateTools(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405")
     ],
     description="You are an AI agent that can generate videos using the Replicate API.",
     instructions=[

diff --git a/cookbook/agents/44_generate_replicate_image.py b/cookbook/agents/44_generate_replicate_image.py
@@ -1,15 +1,13 @@
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
-from phi.tools.replicate import ReplicateToolkit
+from phi.tools.replicate import ReplicateTools
 
 """Create an agent specialized for Replicate AI content generation"""
 
-video_agent = Agent(
+image_agent = Agent(
     name="Image Generator Agent",
     model=OpenAIChat(id="gpt-4o"),
-    tools=[
-        ReplicateToolkit(model="luma/photon-flash")
-    ],
+    tools=[ReplicateTools(model="luma/photon-flash")],
     description="You are an AI agent that can generate images using the Replicate API.",
     instructions=[
         "When the user asks you to create an image, use the `generate_media` tool to create the image.",
@@ -21,4 +19,4 @@
     show_tool_calls=True,
 )
 
-video_agent.print_response("Generate an image of a horse in the dessert.")
+image_agent.print_response("Generate an image of a horse in the dessert.")
diff --git a/cookbook/agents/45_generate_fal_video.py b/cookbook/agents/45_generate_fal_video.py
@@ -0,0 +1,20 @@
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.fal_tools import FalTools
+
+fal_agent = Agent(
+    name="Fal Video Generator Agent",
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[FalTools("fal-ai/hunyuan-video")],
+    description="You are an AI agent that can generate videos using the Fal API.",
+    instructions=[
+        "When the user asks you to create a video, use the `generate_media` tool to create the video.",
+        "Return the URL as raw to the user.",
+        "Don't convert video URL to markdown or anything else.",
+    ],
+    markdown=True,
+    debug_mode=True,
+    show_tool_calls=True,
+)
+
+fal_agent.print_response("Generate video of balloon in the ocean")
diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py
@@ -10,48 +10,84 @@
 from phi.model.openai import OpenAIChat
 from phi.tools.dalle import Dalle
 from phi.tools.models_labs import ModelsLabs
+from phi.model.response import FileType
 from phi.playground import Playground, serve_playground_app
 from phi.storage.agent.sqlite import SqlAgentStorage
+from phi.tools.fal_tools import FalTools
+from pydantic import BaseModel, Field
 
 image_agent_storage_file: str = "tmp/image_agent.db"
 
 image_agent = Agent(
-    name="Image Agent",
+    name="DALL-E Image Agent",
     agent_id="image_agent",
     model=OpenAIChat(id="gpt-4o"),
     tools=[Dalle()],
     description="You are an AI agent that can generate images using DALL-E.",
     instructions=[
         "When the user asks you to create an image, use the `create_image` tool to create the image.",
-        "The image will be displayed in the UI automatically below your response, so you don't need to show the image URL in your response.",
-        "Politely and courteously let the user know that the image has been generated and will be displayed below as soon as its ready.",
+        "Don't provide the URL of the image in the response. Only describe what image was generated."
     ],
     markdown=True,
     debug_mode=True,
     add_history_to_messages=True,
     add_datetime_to_instructions=True,
-    storage=SqlAgentStorage(table_name="image_agent", db_file="tmp/image_agent.db"),
+    storage=SqlAgentStorage(table_name="image_agent", db_file=image_agent_storage_file),
 )
 
-video_agent = Agent(
-    name="Video Agent",
-    agent_id="video_agent",
+ml_gif_agent = Agent(
+    name="ModelsLab GIF Agent",
+    agent_id="ml_gif_agent",
     model=OpenAIChat(id="gpt-4o"),
-    tools=[ModelsLabs(wait_for_completion=True)],
+    tools=[ModelsLabs(wait_for_completion=True, file_type=FileType.GIF)],
+    description="You are an AI agent that can generate gifs using the ModelsLabs API.",
+    instructions=[
+        "When the user asks you to create an image, use the `generate_media` tool to create the image.",
+        "Don't provide the URL of the image in the response. Only describe what image was generated."
+    ],
+    markdown=True,
+    debug_mode=True,
+    add_history_to_messages=True,
+    add_datetime_to_instructions=True,
+    storage=SqlAgentStorage(table_name="ml_gif_agent", db_file=image_agent_storage_file),
+)
+
+ml_video_agent = Agent(
+    name="ModelsLab Video Agent",
+    agent_id="ml_video_agent",
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[ModelsLabs(wait_for_completion=True, file_type=FileType.MP4)],
     description="You are an AI agent that can generate videos using the ModelsLabs API.",
     instructions=[
-        "When the user asks you to create a video, use the `create_video` tool to create the video.",
-        "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.",
-        "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.",
+        "When the user asks you to create a video, use the `generate_media` tool to create the video.",
+        "Don't provide the URL of the video in the response. Only describe what video was generated."
     ],
     markdown=True,
     debug_mode=True,
     add_history_to_messages=True,
     add_datetime_to_instructions=True,
-    storage=SqlAgentStorage(table_name="video_agent", db_file="tmp/video_agent.db"),
+    storage=SqlAgentStorage(table_name="ml_video_agent", db_file=image_agent_storage_file),
 )
 
-app = Playground(agents=[image_agent, video_agent]).get_app()
+fal_agent = Agent(
+    name="Fal Video Agent",
+    agent_id="fal_agent",
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[FalTools("fal-ai/hunyuan-video")],
+    description="You are an AI agent that can generate videos using the Fal API.",
+    instructions=[
+        "When the user asks you to create a video, use the `generate_media` tool to create the video.",
+        "Don't provide the URL of the video in the response. Only describe what video was generated."
+    ],
+    markdown=True,
+    debug_mode=True,
+    add_history_to_messages=True,
+    add_datetime_to_instructions=True,
+    storage=SqlAgentStorage(table_name="fal_agent", db_file=image_agent_storage_file),
+)
+
+
+app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent]).get_app(use_async=False)
 
 if __name__ == "__main__":
     serve_playground_app("multimodal_agent:app", reload=True)
diff --git a/phi/agent/agent.py b/phi/agent/agent.py
@@ -589,7 +589,6 @@ def get_session_data(self) -> Dict[str, Any]:
 
     def get_agent_session(self) -> AgentSession:
         """Get an AgentSession object, which can be saved to the database"""
-
         return AgentSession(
             session_id=self.session_id,
             agent_id=self.agent_id,

diff --git a/phi/llm/google/gemini.py b/phi/llm/google/gemini.py
@@ -132,10 +132,10 @@ def api_kwargs(self) -> Dict[str, Any]:
         return kwargs
 
     def invoke(self, messages: List[Message]):
-        return self.client.generate_media(contents=self.conform_messages_to_gemini(messages))
+        return self.client.generate_content(contents=self.conform_messages_to_gemini(messages))
 
     def invoke_stream(self, messages: List[Message]):
-        yield from self.client.generate_media(
+        yield from self.client.generate_content(
             contents=self.conform_messages_to_gemini(messages),
             stream=True,
         )

diff --git a/phi/llm/openai/chat.py b/phi/llm/openai/chat.py
@@ -181,7 +181,7 @@ def to_dict(self) -> Dict[str, Any]:
         if self.presence_penalty:
             _dict["presence_penalty"] = self.presence_penalty
         if self.response_format:
-            _dict["response_format"] = self.response_format
+            _dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
         if self.seed is not None:
             _dict["seed"] = self.seed
         if self.stop:

diff --git a/phi/llm/vertexai/gemini.py b/phi/llm/vertexai/gemini.py
@@ -130,10 +130,10 @@ def convert_messages_to_contents(self, messages: List[Message]) -> List[Any]:
         return _contents
 
     def invoke(self, messages: List[Message]) -> GenerationResponse:
-        return self.client.generate_media(contents=self.convert_messages_to_contents(messages))
+        return self.client.generate_content(contents=self.convert_messages_to_contents(messages))
 
     def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]:
-        yield from self.client.generate_media(
+        yield from self.client.generate_content(
             contents=self.convert_messages_to_contents(messages),
             stream=True,
         )

diff --git a/phi/model/google/gemini.py b/phi/model/google/gemini.py
@@ -267,7 +267,7 @@ def invoke(self, messages: List[Message]):
         Returns:
             GenerateContentResponse: The response from the model.
         """
-        return self.get_client().generate_media(contents=self._format_messages(messages))
+        return self.get_client().generate_content(contents=self._format_messages(messages))
 
     def invoke_stream(self, messages: List[Message]):
         """
@@ -279,7 +279,7 @@ def invoke_stream(self, messages: List[Message]):
         Returns:
             Iterator[GenerateContentResponse]: The response from the model as a stream.
         """
-        yield from self.get_client().generate_media(
+        yield from self.get_client().generate_content(
             contents=self._format_messages(messages),
             stream=True,
         )

diff --git a/phi/model/openai/chat.py b/phi/model/openai/chat.py
@@ -255,7 +255,7 @@ def to_dict(self) -> Dict[str, Any]:
         if self.presence_penalty is not None:
             model_dict["presence_penalty"] = self.presence_penalty
         if self.response_format is not None:
-            model_dict["response_format"] = self.response_format
+            model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
         if self.seed is not None:
             model_dict["seed"] = self.seed
         if self.stop is not None:

diff --git a/phi/model/response.py b/phi/model/response.py
@@ -23,3 +23,8 @@ class ModelResponse:
     tool_call: Optional[Dict[str, Any]] = None
     event: str = ModelResponseEvent.assistant_response.value
     created_at: int = int(time())
+
+
+class FileType(str, Enum):
+    MP4 = "mp4"
+    GIF = "gif"
diff --git a/phi/model/vertexai/gemini.py b/phi/model/vertexai/gemini.py
@@ -258,7 +258,7 @@ def invoke(self, messages: List[Message]) -> GenerationResponse:
         Returns:
             GenerationResponse object containing the response content
         """
-        return self.get_client().generate_media(contents=self._format_messages(messages))
+        return self.get_client().generate_content(contents=self._format_messages(messages))
 
     def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]:
         """
@@ -270,7 +270,7 @@ def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]
         Returns:
             Iterator[GenerationResponse] object containing the response content
         """
-        yield from self.get_client().generate_media(
+        yield from self.get_client().generate_content(
             contents=self._format_messages(messages),
             stream=True,
         )

diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py
@@ -0,0 +1,88 @@
+"""
+pip install fal-client
+"""
+
+from os import getenv
+from typing import Optional
+
+from phi.agent import Agent
+from phi.tools import Toolkit
+from phi.utils.log import logger
+from phi.model.content import Video, Image
+from uuid import uuid4
+
+
+try:
+    import fal_client  # type: ignore
+except ImportError:
+    raise ImportError("`fal_client` not installed. Please install using `pip install fal-client`")
+
+
+class FalTools(Toolkit):
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: str = "fal-ai/hunyuan-video",
+    ):
+        super().__init__(name="fal")
+
+        self.api_key = api_key or getenv("FAL_KEY")
+        self.model = model
+        if not self.api_key:
+            logger.error("FAL_KEY not set. Please set the FAL_KEY environment variable.")
+        self.seen_logs: set[str] = set()
+        self.register(self.generate_media)
+
+    def on_queue_update(self, update):
+        if isinstance(update, fal_client.InProgress) and update.logs:
+            for log in update.logs:
+                message = log["message"]
+                if message not in self.seen_logs:
+                    logger.info(message)
+                    self.seen_logs.add(message)
+
+    def generate_media(self, agent: Agent, prompt: str) -> str:
+        """
+        Use this function to run a model with a given prompt.
+
+        Args:
+            prompt (str): A text description of the task.
+        Returns:
+            str: Return the result of the model.
+        """
+        try:
+            result = fal_client.subscribe(
+                self.model,
+                arguments={"prompt": prompt},
+                with_logs=True,
+                on_queue_update=self.on_queue_update,
+            )
+
+            media_id = str(uuid4())
+
+            if "image" in result:
+                url = result.get("image", {}).get("url", "")
+                agent.add_image(
+                    Image(
+                        id=media_id,
+                        url=url,
+                    )
+                )
+                media_type = "image"
+            elif "video" in result:
+                url = result.get("video", {}).get("url", "")
+                agent.add_video(
+                    Video(
+                        id=media_id,
+                        url=url,
+                    )
+                )
+                media_type = "video"
+            else:
+                logger.error(f"Unsupported type in result: {result}")
+                return f"Unsupported type in result: {result}"
+
+            return f"{media_type.capitalize()} generated successfully at {url}"
+        except Exception as e:
+            logger.error(f"Failed to run model: {e}")
+            return f"Error: {e}"