From 0a739bc707b6d4a8a71b946e529b8ab004135773 Mon Sep 17 00:00:00 2001 From: ayush0054 Date: Tue, 10 Dec 2024 01:59:23 +0530 Subject: [PATCH 01/16] luma lab video generation --- cookbook/tools/lumaAgent.py | 37 +++++++++++++ phi/tools/lumalab.py | 103 ++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 cookbook/tools/lumaAgent.py create mode 100644 phi/tools/lumalab.py diff --git a/cookbook/tools/lumaAgent.py b/cookbook/tools/lumaAgent.py new file mode 100644 index 0000000000..a09d7c22fb --- /dev/null +++ b/cookbook/tools/lumaAgent.py @@ -0,0 +1,37 @@ +from phi.agent import Agent +from phi.llm.openai import OpenAIChat +from phi.tools.lumalab import LumaLab + +"""Create an agent specialized for Luma AI video generation""" + +luma_agent = Agent( + name="Luma Video Agent", + agent_id="luma-video-agent", + llm=OpenAIChat(model="gpt-4o"), + tools=[LumaLab()], # Using the LumaLab tool we created + markdown=True, + debug_mode=True, + show_tool_calls=True, + instructions=[ + "You are an agent designed to generate videos using the Luma AI API.", + "When asked to generate a video, use the generate_video function from the LumaLab toolkit.", + "Only pass the required parameters to the generate_video function unless specifically asked for other parameters.", + "The default parameters are:", + "- loop: False", + "- aspect_ratio: '16:9'", + "- keyframes: None", + "After generating the video, display the video URL in markdown format.", + "If the video generation is async (wait_for_completion=False), inform the user about the generation ID.", + "If any errors occur during generation, clearly communicate them to the user.", + ], + system_message=( + "Do not modify any default parameters of the generate_video function " + "unless explicitly specified in the user's request. Always provide clear " + "feedback about the video generation status." + ), +) + +luma_agent.run("Generate a video of a sunset over a peaceful ocean with gentle waves") + + + diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py new file mode 100644 index 0000000000..7912a1ebc0 --- /dev/null +++ b/phi/tools/lumalab.py @@ -0,0 +1,103 @@ +import time +import json +from os import getenv +from typing import Optional, Dict, Any + +from phi.agent import Agent +from phi.tools import Toolkit +from phi.utils.log import logger + +try: + from lumaai import LumaAI +except ImportError: + raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`") + + +class LumaLab(Toolkit): + def __init__( + self, + api_key: Optional[str] = None, + wait_for_completion: bool = True, + poll_interval: int = 3, + max_wait_time: int = 300, # 5 minutes + ): + super().__init__(name="luma_lab") + + self.wait_for_completion = wait_for_completion + self.poll_interval = poll_interval + self.max_wait_time = max_wait_time + self.api_key = api_key or getenv("LUMAAI_API_KEY") + + if not self.api_key: + logger.error("LUMAAI_API_KEY not set. Please set the LUMAAI_API_KEY environment variable.") + + self.client = LumaAI(auth_token=self.api_key) + self.register(self.generate_video) + + def generate_video( + self, + agent: Agent, + prompt: str, + loop: bool = False, + aspect_ratio: str = "16:9", + keyframes: Optional[Dict[str, Any]] = None, + ) -> str: + """Use this function to generate a video given a prompt. + + Args: + prompt (str): A text description of the desired video. + loop (bool, optional): Whether the video should loop. Defaults to False. + aspect_ratio (str, optional): Aspect ratio of the video. Defaults to "16:9". + keyframes (Dict[str, Any], optional): Keyframe configuration for image-to-video or video extension. + + Returns: + str: A message indicating if the video has been generated successfully or an error message. + """ + if not self.api_key: + return "Please set the LUMAAI_API_KEY" + + try: + # Create generation request + generation_params = { + "prompt": prompt, + "loop": loop, + "aspect_ratio": aspect_ratio, + } + if keyframes: + generation_params["keyframes"] = keyframes + + logger.debug(f"Generating video with params: {generation_params}") + generation = self.client.generations.create(**generation_params) + + if not self.wait_for_completion: + agent.add_video(json.dumps({"id": generation.id})) + return f"Video generation started with ID: {generation.id}" + + # Poll for completion + completed = False + seconds_waited = 0 + while not completed and seconds_waited < self.max_wait_time: + generation = self.client.generations.get(id=generation.id) + + if generation.state == "completed": + completed = True + video_url = generation.assets.video + agent.add_video(json.dumps({ + "id": generation.id, + "url": video_url, + "state": "completed" + })) + return f"Video generated successfully: {video_url}" + elif generation.state == "failed": + return f"Generation failed: {generation.failure_reason}" + + logger.info(f"Generation in progress... State: {generation.state}") + time.sleep(self.poll_interval) + seconds_waited += self.poll_interval + + if not completed: + return f"Video generation timed out after {self.max_wait_time} seconds" + + except Exception as e: + logger.error(f"Failed to generate video: {e}") + return f"Error: {e}" From d438e3a6b81a0ba667807a7f056bf403b20ff5a0 Mon Sep 17 00:00:00 2001 From: ayush0054 Date: Tue, 10 Dec 2024 14:53:22 +0530 Subject: [PATCH 02/16] added image to video funcationality ,fixed formatting and mypy errors --- cookbook/tools/lumaAgent.py | 37 --------- cookbook/tools/lumalabs_tool.py | 46 +++++++++++ phi/tools/lumalab.py | 133 ++++++++++++++++++++++++-------- 3 files changed, 148 insertions(+), 68 deletions(-) delete mode 100644 cookbook/tools/lumaAgent.py create mode 100644 cookbook/tools/lumalabs_tool.py diff --git a/cookbook/tools/lumaAgent.py b/cookbook/tools/lumaAgent.py deleted file mode 100644 index a09d7c22fb..0000000000 --- a/cookbook/tools/lumaAgent.py +++ /dev/null @@ -1,37 +0,0 @@ -from phi.agent import Agent -from phi.llm.openai import OpenAIChat -from phi.tools.lumalab import LumaLab - -"""Create an agent specialized for Luma AI video generation""" - -luma_agent = Agent( - name="Luma Video Agent", - agent_id="luma-video-agent", - llm=OpenAIChat(model="gpt-4o"), - tools=[LumaLab()], # Using the LumaLab tool we created - markdown=True, - debug_mode=True, - show_tool_calls=True, - instructions=[ - "You are an agent designed to generate videos using the Luma AI API.", - "When asked to generate a video, use the generate_video function from the LumaLab toolkit.", - "Only pass the required parameters to the generate_video function unless specifically asked for other parameters.", - "The default parameters are:", - "- loop: False", - "- aspect_ratio: '16:9'", - "- keyframes: None", - "After generating the video, display the video URL in markdown format.", - "If the video generation is async (wait_for_completion=False), inform the user about the generation ID.", - "If any errors occur during generation, clearly communicate them to the user.", - ], - system_message=( - "Do not modify any default parameters of the generate_video function " - "unless explicitly specified in the user's request. Always provide clear " - "feedback about the video generation status." - ), -) - -luma_agent.run("Generate a video of a sunset over a peaceful ocean with gentle waves") - - - diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py new file mode 100644 index 0000000000..bfa9f60435 --- /dev/null +++ b/cookbook/tools/lumalabs_tool.py @@ -0,0 +1,46 @@ +from phi.agent import Agent +from phi.llm.openai import OpenAIChat +from phi.tools.lumalab import LumaLab + +"""Create an agent specialized for Luma AI video generation""" + +luma_agent = Agent( + name="Luma Video Agent", + agent_id="luma-video-agent", + llm=OpenAIChat(model="gpt-4o"), + tools=[LumaLab()], # Using the LumaLab tool we created + markdown=True, + debug_mode=True, + show_tool_calls=True, + instructions=[ + "You are an agent designed to generate videos using the Luma AI API.", + "You can generate videos in two ways:", + "1. Text-to-Video Generation:", + " - Use the generate_video function for creating videos from text prompts", + " - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None", + "2. Image-to-Video Generation:", + " - Use the image_to_video function when starting from one or two images", + " - Required parameters: prompt, image_url", + " - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'", + " - The image URLs must be publicly accessible", + "After generating any video:", + "- Display the video URL in markdown format", + "- If generation is async (wait_for_completion=False), inform about the generation ID", + "- Clearly communicate any errors that occur", + "Choose the appropriate function based on whether the user provides image URLs or just a text prompt.", + ], + system_message=( + "Use generate_video for text-to-video requests and image_to_video for image-based " + "generation. Don't modify default parameters unless specifically requested. " + "Always provide clear feedback about the video generation status." + ), +) + +# luma_agent.run("Generate a video of a car in a city") +# luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg") +luma_agent.run(""" +Create a transition video between these two images: +Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380 +End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380 +Make it a smooth, natural movement +""") diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py index 7912a1ebc0..6336d2460d 100644 --- a/phi/tools/lumalab.py +++ b/phi/tools/lumalab.py @@ -1,18 +1,27 @@ import time import json from os import getenv -from typing import Optional, Dict, Any +from typing import Optional, Dict, Any, Literal, TypedDict from phi.agent import Agent from phi.tools import Toolkit from phi.utils.log import logger try: - from lumaai import LumaAI + from lumaai import LumaAI # type: ignore except ImportError: raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`") +# Define types for keyframe structure +class KeyframeImage(TypedDict): + type: Literal["image"] + url: str + + +Keyframes = Dict[str, KeyframeImage] + + class LumaLab(Toolkit): def __init__( self, @@ -33,61 +42,124 @@ def __init__( self.client = LumaAI(auth_token=self.api_key) self.register(self.generate_video) + self.register(self.image_to_video) - def generate_video( + def image_to_video( self, agent: Agent, prompt: str, + image_url: str, + end_image_url: Optional[str] = None, loop: bool = False, - aspect_ratio: str = "16:9", - keyframes: Optional[Dict[str, Any]] = None, + aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9", ) -> str: - """Use this function to generate a video given a prompt. + """Generate a video from one or two images with a prompt. Args: - prompt (str): A text description of the desired video. - loop (bool, optional): Whether the video should loop. Defaults to False. - aspect_ratio (str, optional): Aspect ratio of the video. Defaults to "16:9". - keyframes (Dict[str, Any], optional): Keyframe configuration for image-to-video or video extension. + agent: The agent instance + prompt: Text description of the desired video + image_url: URL of the starting image + end_image_url: Optional URL of the ending image + loop: Whether the video should loop + aspect_ratio: Aspect ratio of the output video Returns: - str: A message indicating if the video has been generated successfully or an error message. + str: Status message or error """ if not self.api_key: return "Please set the LUMAAI_API_KEY" try: - # Create generation request - generation_params = { + # Construct keyframes + keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": image_url}} + + # Add end image if provided + if end_image_url: + keyframes["frame1"] = {"type": "image", "url": end_image_url} + + # Create generation with keyframes + generation = self.client.generations.create( + prompt=prompt, + loop=loop, + aspect_ratio=aspect_ratio, + keyframes=keyframes, # type: ignore + ) + + if not self.wait_for_completion: + if generation and generation.id: + agent.add_video(json.dumps({"id": generation.id})) + return f"Video generation started with ID: {generation.id}" + return "Failed to start video generation: No generation ID received" + + # Poll for completion + seconds_waited = 0 + while seconds_waited < self.max_wait_time: + if not generation or not generation.id: + return "Failed to get generation ID" + + generation = self.client.generations.get(generation.id) + + if generation.state == "completed" and generation.assets: + video_url = generation.assets.video + if video_url: + agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"})) + return f"Video generated successfully: {video_url}" + elif generation.state == "failed": + return f"Generation failed: {generation.failure_reason}" + + logger.info(f"Generation in progress... State: {generation.state}") + time.sleep(self.poll_interval) + seconds_waited += self.poll_interval + + return f"Video generation timed out after {self.max_wait_time} seconds" + + except Exception as e: + logger.error(f"Failed to generate video: {e}") + return f"Error: {e}" + + def generate_video( + self, + agent: Agent, + prompt: str, + loop: bool = False, + aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9", + keyframes: Optional[Dict[str, Dict[str, str]]] = None, + ) -> str: + """Use this function to generate a video given a prompt.""" + if not self.api_key: + return "Please set the LUMAAI_API_KEY" + + try: + generation_params: Dict[str, Any] = { "prompt": prompt, "loop": loop, "aspect_ratio": aspect_ratio, } - if keyframes: + + if keyframes is not None: generation_params["keyframes"] = keyframes - logger.debug(f"Generating video with params: {generation_params}") - generation = self.client.generations.create(**generation_params) + generation = self.client.generations.create(**generation_params) # type: ignore if not self.wait_for_completion: - agent.add_video(json.dumps({"id": generation.id})) - return f"Video generation started with ID: {generation.id}" + if generation and generation.id: + agent.add_video(json.dumps({"id": generation.id})) + return f"Video generation started with ID: {generation.id}" + return "Failed to start video generation: No generation ID received" # Poll for completion - completed = False seconds_waited = 0 - while not completed and seconds_waited < self.max_wait_time: - generation = self.client.generations.get(id=generation.id) + while seconds_waited < self.max_wait_time: + if not generation or not generation.id: + return "Failed to get generation ID" + + generation = self.client.generations.get(generation.id) - if generation.state == "completed": - completed = True + if generation.state == "completed" and generation.assets: video_url = generation.assets.video - agent.add_video(json.dumps({ - "id": generation.id, - "url": video_url, - "state": "completed" - })) - return f"Video generated successfully: {video_url}" + if video_url: + agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"})) + return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" @@ -95,8 +167,7 @@ def generate_video( time.sleep(self.poll_interval) seconds_waited += self.poll_interval - if not completed: - return f"Video generation timed out after {self.max_wait_time} seconds" + return f"Video generation timed out after {self.max_wait_time} seconds" except Exception as e: logger.error(f"Failed to generate video: {e}") From e85e388c2330f0399b894cb4eb277166522452d8 Mon Sep 17 00:00:00 2001 From: ayush0054 Date: Tue, 10 Dec 2024 16:17:22 +0530 Subject: [PATCH 03/16] updated according to comments/review --- cookbook/tools/lumalabs_tool.py | 20 ++++++++++---------- phi/tools/lumalab.py | 22 +++++++++------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py index bfa9f60435..43fe5c6f2a 100644 --- a/cookbook/tools/lumalabs_tool.py +++ b/cookbook/tools/lumalabs_tool.py @@ -1,6 +1,6 @@ from phi.agent import Agent from phi.llm.openai import OpenAIChat -from phi.tools.lumalab import LumaLab +from phi.tools.lumalab import LumaLabToolkit """Create an agent specialized for Luma AI video generation""" @@ -8,7 +8,7 @@ name="Luma Video Agent", agent_id="luma-video-agent", llm=OpenAIChat(model="gpt-4o"), - tools=[LumaLab()], # Using the LumaLab tool we created + tools=[LumaLabToolkit()], # Using the LumaLab tool we created markdown=True, debug_mode=True, show_tool_calls=True, @@ -20,7 +20,7 @@ " - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None", "2. Image-to-Video Generation:", " - Use the image_to_video function when starting from one or two images", - " - Required parameters: prompt, image_url", + " - Required parameters: prompt, start_image_url", " - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'", " - The image URLs must be publicly accessible", "After generating any video:", @@ -36,11 +36,11 @@ ), ) -# luma_agent.run("Generate a video of a car in a city") +luma_agent.run("Generate a video of a car in a sky") # luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg") -luma_agent.run(""" -Create a transition video between these two images: -Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380 -End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380 -Make it a smooth, natural movement -""") +# luma_agent.run(""" +# Create a transition video between these two images: +# Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380 +# End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380 +# Make it a smooth, natural movement +# """) diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py index 6336d2460d..4caaf67436 100644 --- a/phi/tools/lumalab.py +++ b/phi/tools/lumalab.py @@ -1,11 +1,11 @@ import time -import json from os import getenv from typing import Optional, Dict, Any, Literal, TypedDict from phi.agent import Agent from phi.tools import Toolkit from phi.utils.log import logger +from phi.model.content import Video try: from lumaai import LumaAI # type: ignore @@ -22,7 +22,7 @@ class KeyframeImage(TypedDict): Keyframes = Dict[str, KeyframeImage] -class LumaLab(Toolkit): +class LumaLabToolkit(Toolkit): def __init__( self, api_key: Optional[str] = None, @@ -48,7 +48,7 @@ def image_to_video( self, agent: Agent, prompt: str, - image_url: str, + start_image_url: str, end_image_url: Optional[str] = None, loop: bool = False, aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9", @@ -58,7 +58,7 @@ def image_to_video( Args: agent: The agent instance prompt: Text description of the desired video - image_url: URL of the starting image + start_image_url: URL of the starting image end_image_url: Optional URL of the ending image loop: Whether the video should loop aspect_ratio: Aspect ratio of the output video @@ -66,12 +66,10 @@ def image_to_video( Returns: str: Status message or error """ - if not self.api_key: - return "Please set the LUMAAI_API_KEY" try: # Construct keyframes - keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": image_url}} + keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": start_image_url}} # Add end image if provided if end_image_url: @@ -87,7 +85,7 @@ def image_to_video( if not self.wait_for_completion: if generation and generation.id: - agent.add_video(json.dumps({"id": generation.id})) + agent.add_video(Video(id=generation.id)) return f"Video generation started with ID: {generation.id}" return "Failed to start video generation: No generation ID received" @@ -102,7 +100,7 @@ def image_to_video( if generation.state == "completed" and generation.assets: video_url = generation.assets.video if video_url: - agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"})) + agent.add_video(Video(id=generation.id, url=video_url, eta="completed")) return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" @@ -126,8 +124,6 @@ def generate_video( keyframes: Optional[Dict[str, Dict[str, str]]] = None, ) -> str: """Use this function to generate a video given a prompt.""" - if not self.api_key: - return "Please set the LUMAAI_API_KEY" try: generation_params: Dict[str, Any] = { @@ -143,7 +139,7 @@ def generate_video( if not self.wait_for_completion: if generation and generation.id: - agent.add_video(json.dumps({"id": generation.id})) + agent.add_video(Video(id=generation.id)) return f"Video generation started with ID: {generation.id}" return "Failed to start video generation: No generation ID received" @@ -158,7 +154,7 @@ def generate_video( if generation.state == "completed" and generation.assets: video_url = generation.assets.video if video_url: - agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"})) + agent.add_video(Video(id=generation.id, url=video_url, state="completed")) return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" From 47f59f1ce2db4228b0dd2b4345fd951505928fb2 Mon Sep 17 00:00:00 2001 From: ayush0054 Date: Tue, 10 Dec 2024 16:24:08 +0530 Subject: [PATCH 04/16] formatting --- phi/tools/lumalab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py index 4caaf67436..32723007de 100644 --- a/phi/tools/lumalab.py +++ b/phi/tools/lumalab.py @@ -85,7 +85,7 @@ def image_to_video( if not self.wait_for_completion: if generation and generation.id: - agent.add_video(Video(id=generation.id)) + agent.add_video(Video(id=generation.id, url=None)) return f"Video generation started with ID: {generation.id}" return "Failed to start video generation: No generation ID received" @@ -139,7 +139,7 @@ def generate_video( if not self.wait_for_completion: if generation and generation.id: - agent.add_video(Video(id=generation.id)) + agent.add_video(Video(id=generation.id, url=None)) return f"Video generation started with ID: {generation.id}" return "Failed to start video generation: No generation ID received" From b73b71d16d34212044018fc7d2575f01520f6558 Mon Sep 17 00:00:00 2001 From: Ashpreet Bedi Date: Tue, 10 Dec 2024 14:00:08 +0000 Subject: [PATCH 05/16] README --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 50a5b995be..677104fcf4 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,6 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning style="border-radius: 8px;" /> -# What is phidata? - **Phidata is a framework for building multi-modal agents with memory, knowledge, tools and reasoning.** ## Install @@ -27,7 +25,7 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning pip install -U phidata ``` -# Key Features +## Key Features - [Simple & Elegant](#simple--elegant) - [Powerful & Flexible](#powerful--flexible) From 2473a1c247c1fc67d482fa4e7549ba428c470b7d Mon Sep 17 00:00:00 2001 From: Ashpreet Bedi Date: Tue, 10 Dec 2024 14:03:20 +0000 Subject: [PATCH 06/16] README --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 677104fcf4..bea3e57fda 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@

-Build multi-modal Agents with memory, knowledge, tools and reasoning +A framework for building multi-modal Agents with memory, knowledge, tools and reasoning

-**Phidata is a framework for building multi-modal agents with memory, knowledge, tools and reasoning.** - ## Install ```shell From 2224385b82ae8621196cd4e1ece3a2a1f693af21 Mon Sep 17 00:00:00 2001 From: Ashpreet Bedi Date: Tue, 10 Dec 2024 14:06:46 +0000 Subject: [PATCH 07/16] README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bea3e57fda..0f7fda807b 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@

-A framework for building multi-modal Agents with memory, knowledge, tools and reasoning +Building multi-modal Agents with memory, knowledge, tools and reasoning. Chat with them using a beautiful Agent UI.

Date: Tue, 10 Dec 2024 14:07:18 +0000 Subject: [PATCH 08/16] README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0f7fda807b..fd0f95c50e 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@

-Building multi-modal Agents with memory, knowledge, tools and reasoning. Chat with them using a beautiful Agent UI. +Building multi-modal Agents with memory, knowledge, tools and reasoning.

Date: Tue, 10 Dec 2024 14:32:41 +0000 Subject: [PATCH 09/16] README --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fd0f95c50e..5f70c90895 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@

-Building multi-modal Agents with memory, knowledge, tools and reasoning. +Build multi-modal Agents with memory, knowledge, tools and reasoning.

+## What is phidata? + +**Phidata is a framework for building multi-modal agents**, use phidata to: + +- **Build multi-modal agents with memory, knowledge, tools and reasoning.** +- **Build teams of agents that can work together to solve problems.** +- **Chat with your agents using a beautiful Agent UI.** + ## Install ```shell From 4a2c492dd014a03ccc3d90035915c9778c84f4d6 Mon Sep 17 00:00:00 2001 From: Ashpreet Bedi Date: Tue, 10 Dec 2024 15:03:21 +0000 Subject: [PATCH 10/16] README --- cookbook/agents/30_pre_and_post_hooks.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cookbook/agents/30_pre_and_post_hooks.py b/cookbook/agents/30_pre_and_post_hooks.py index 900387b8d1..97a8bcab0b 100644 --- a/cookbook/agents/30_pre_and_post_hooks.py +++ b/cookbook/agents/30_pre_and_post_hooks.py @@ -6,16 +6,16 @@ from phi.tools import tool, FunctionCall -def pre_hook(function_call: FunctionCall): - print(f"Pre-hook: {function_call.function.name}") - print(f"Arguments: {function_call.arguments}") - print(f"Result: {function_call.result}") +def pre_hook(fc: FunctionCall): + print(f"Pre-hook: {fc.function.name}") + print(f"Arguments: {fc.arguments}") + print(f"Result: {fc.result}") -def post_hook(function_call: FunctionCall): - print(f"Post-hook: {function_call.function.name}") - print(f"Arguments: {function_call.arguments}") - print(f"Result: {function_call.result}") +def post_hook(fc: FunctionCall): + print(f"Post-hook: {fc.function.name}") + print(f"Arguments: {fc.arguments}") + print(f"Result: {fc.result}") @tool(pre_hook=pre_hook, post_hook=post_hook) From ba59bd457434d306fcf87a419b4e6e057b9c2717 Mon Sep 17 00:00:00 2001 From: Ashpreet Bedi Date: Tue, 10 Dec 2024 15:07:13 +0000 Subject: [PATCH 11/16] v2.6.6 --- phi/agent/agent.py | 2 +- phi/model/__init__.py | 1 - pyproject.toml | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/phi/agent/agent.py b/phi/agent/agent.py index 9cba61c559..21b456fde4 100644 --- a/phi/agent/agent.py +++ b/phi/agent/agent.py @@ -31,7 +31,7 @@ from phi.reasoning.step import ReasoningStep, ReasoningSteps, NextAction from phi.run.response import RunEvent, RunResponse, RunResponseExtraData from phi.knowledge.agent import AgentKnowledge -from phi.model import Model +from phi.model.base import Model from phi.model.message import Message, MessageReferences from phi.model.response import ModelResponse, ModelResponseEvent from phi.memory.agent import AgentMemory, MemoryRetrieval, Memory, AgentRun, SessionSummary # noqa: F401 diff --git a/phi/model/__init__.py b/phi/model/__init__.py index 00c37db694..e69de29bb2 100644 --- a/phi/model/__init__.py +++ b/phi/model/__init__.py @@ -1 +0,0 @@ -from phi.model.base import Model diff --git a/pyproject.toml b/pyproject.toml index fa76853404..b3c1fa0a87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "phidata" -version = "2.6.5" +version = "2.6.6" description = "Build multi-modal Agents with memory, knowledge and tools." requires-python = ">=3.7" readme = "README.md" From 795ee52caab25acae9cd8b7ca3499dfd3a1cf738 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 19:05:28 +0200 Subject: [PATCH 12/16] Update lumalabs to work with new interface --- cookbook/tools/lumalabs_tool.py | 11 +++++------ phi/tools/lumalab.py | 20 +++++++++----------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py index 43fe5c6f2a..8d87d31f13 100644 --- a/cookbook/tools/lumalabs_tool.py +++ b/cookbook/tools/lumalabs_tool.py @@ -1,6 +1,6 @@ from phi.agent import Agent from phi.llm.openai import OpenAIChat -from phi.tools.lumalab import LumaLabToolkit +from phi.tools.lumalab import LumaLabTools """Create an agent specialized for Luma AI video generation""" @@ -8,7 +8,7 @@ name="Luma Video Agent", agent_id="luma-video-agent", llm=OpenAIChat(model="gpt-4o"), - tools=[LumaLabToolkit()], # Using the LumaLab tool we created + tools=[LumaLabTools()], # Using the LumaLab tool we created markdown=True, debug_mode=True, show_tool_calls=True, @@ -23,11 +23,10 @@ " - Required parameters: prompt, start_image_url", " - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'", " - The image URLs must be publicly accessible", - "After generating any video:", - "- Display the video URL in markdown format", - "- If generation is async (wait_for_completion=False), inform about the generation ID", - "- Clearly communicate any errors that occur", "Choose the appropriate function based on whether the user provides image URLs or just a text prompt.", + "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", + "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", + "After generating any video, if generation is async (wait_for_completion=False), inform about the generation ID", ], system_message=( "Use generate_video for text-to-video requests and image_to_video for image-based " diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py index 32723007de..bebb2b652d 100644 --- a/phi/tools/lumalab.py +++ b/phi/tools/lumalab.py @@ -1,4 +1,5 @@ import time +import uuid from os import getenv from typing import Optional, Dict, Any, Literal, TypedDict @@ -22,7 +23,7 @@ class KeyframeImage(TypedDict): Keyframes = Dict[str, KeyframeImage] -class LumaLabToolkit(Toolkit): +class LumaLabTools(Toolkit): def __init__( self, api_key: Optional[str] = None, @@ -83,11 +84,10 @@ def image_to_video( keyframes=keyframes, # type: ignore ) + video_id = str(uuid.uuid4()) + if not self.wait_for_completion: - if generation and generation.id: - agent.add_video(Video(id=generation.id, url=None)) - return f"Video generation started with ID: {generation.id}" - return "Failed to start video generation: No generation ID received" + return "Async generation unsupported" # Poll for completion seconds_waited = 0 @@ -100,7 +100,7 @@ def image_to_video( if generation.state == "completed" and generation.assets: video_url = generation.assets.video if video_url: - agent.add_video(Video(id=generation.id, url=video_url, eta="completed")) + agent.add_video(Video(id=video_id, url=video_url, eta="completed")) return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" @@ -137,11 +137,9 @@ def generate_video( generation = self.client.generations.create(**generation_params) # type: ignore + video_id = str(uuid.uuid4()) if not self.wait_for_completion: - if generation and generation.id: - agent.add_video(Video(id=generation.id, url=None)) - return f"Video generation started with ID: {generation.id}" - return "Failed to start video generation: No generation ID received" + return "Async generation unsupported" # Poll for completion seconds_waited = 0 @@ -154,7 +152,7 @@ def generate_video( if generation.state == "completed" and generation.assets: video_url = generation.assets.video if video_url: - agent.add_video(Video(id=generation.id, url=video_url, state="completed")) + agent.add_video(Video(id=video_id, url=video_url, state="completed")) return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" From 33715d3cfbc75c06e30815331ce011558c0c03b7 Mon Sep 17 00:00:00 2001 From: Ashpreet Bedi Date: Tue, 10 Dec 2024 22:42:43 +0000 Subject: [PATCH 13/16] README --- README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5f70c90895..9d87332bb2 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning. pip install -U phidata ``` -## Key Features +## Examples & Key Features - [Simple & Elegant](#simple--elegant) - [Powerful & Flexible](#powerful--flexible) @@ -46,7 +46,9 @@ pip install -U phidata ## Simple & Elegant -Phidata Agents are simple and elegant, resulting in minimal, beautiful code. For example, you can create a web search agent using 10 lines of code, create a file `web_search.py` +Phidata Agents are simple and elegant, resulting in minimal, beautiful code. + +For example, you can create a web search agent in 10 lines of code, create a file `web_search.py` ```python from phi.agent import Agent @@ -75,7 +77,9 @@ python web_search.py ## Powerful & Flexible -Phidata agents can use multiple tools and follow instructions to achieve complex tasks. For example, you can create a finance agent that can query financial data, create a file `finance_agent.py` +Phidata agents can use multiple tools and follow instructions to achieve complex tasks. + +For example, you can create a finance agent with tools to query financial data, create a file `finance_agent.py` ```python from phi.agent import Agent @@ -103,7 +107,9 @@ python finance_agent.py ## Multi-Modal by default -Phidata agents support text, images, audio and video. For example, you can create an image agent that can understand images and make tool calls as needed, create a file `image_agent.py` +Phidata agents support text, images, audio and video. + +For example, you can create an image agent that can understand images and make tool calls as needed, create a file `image_agent.py` ```python from phi.agent import Agent From faf8f35c15e67db8055afca3d7ae4dead93c1ce8 Mon Sep 17 00:00:00 2001 From: Ashpreet Bedi Date: Tue, 10 Dec 2024 23:06:30 +0000 Subject: [PATCH 14/16] README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9d87332bb2..367f117df7 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning. pip install -U phidata ``` -## Examples & Key Features +## Key Features - [Simple & Elegant](#simple--elegant) - [Powerful & Flexible](#powerful--flexible) From 78cbcbf43e113b2ca4ffa6b30f4065a45ee661c5 Mon Sep 17 00:00:00 2001 From: Ashpreet Bedi Date: Tue, 10 Dec 2024 23:22:42 +0000 Subject: [PATCH 15/16] README --- README.md | 15 ++++--- cookbook/agents/43_research_agent_exa.py | 55 ++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 cookbook/agents/43_research_agent_exa.py diff --git a/README.md b/README.md index 367f117df7..569b649211 100644 --- a/README.md +++ b/README.md @@ -225,14 +225,18 @@ if __name__ == "__main__": serve_playground_app("playground:app", reload=True) ``` -Authenticate with phidata: -``` +Authenticate with phidata by running the following command: + +```shell phi auth ``` -> [!NOTE] -> If `phi auth` fails, you can set the `PHI_API_KEY` environment variable by copying it from [phidata.app](https://www.phidata.app) +or by exporting the `PHI_API_KEY` for your workspace from [phidata.app](https://www.phidata.app) + +```bash +export PHI_API_KEY=phi-*** +``` Install dependencies and run the Agent Playground: @@ -383,9 +387,6 @@ python reasoning_agent.py > > It is an experiment fueled by curiosity, combining COT and tool use. Set your expectations very low for this initial release. For example: It will not be able to count ‘r’s in ‘strawberry’. -> [!TIP] -> If using tools with `reasoning=True`, set `structured_outputs=False` because gpt-4o doesnt support tools with structured outputs. - ## Demo Agents The Agent Playground includes a few demo agents that you can test with. If you have recommendations for other demo agents, please let us know in our [community forum](https://community.phidata.com/). diff --git a/cookbook/agents/43_research_agent_exa.py b/cookbook/agents/43_research_agent_exa.py new file mode 100644 index 0000000000..4184f5276c --- /dev/null +++ b/cookbook/agents/43_research_agent_exa.py @@ -0,0 +1,55 @@ +"""Please install dependencies using: +pip install openai exa-py phidata +""" + +from textwrap import dedent +from datetime import datetime + +from phi.agent import Agent +from phi.model.openai import OpenAIChat +from phi.tools.exa import ExaTools + +agent = Agent( + model=OpenAIChat(id="gpt-4o"), + tools=[ExaTools(start_published_date=datetime.now().strftime("%Y-%m-%d"), type="keyword")], + description="You are an advanced AI researcher writing a report on a topic.", + instructions=[ + "For the provided topic, run 3 different searches.", + "Read the results carefully and prepare a NYT worthy report.", + "Focus on facts and make sure to provide references.", + ], + expected_output=dedent("""\ + An engaging, informative, and well-structured report in markdown format: + + ## Engaging Report Title + + ### Overview + {give a brief introduction of the report and why the user should read this report} + {make this section engaging and create a hook for the reader} + + ### Section 1 + {break the report into sections} + {provide details/facts/processes in this section} + + ... more sections as necessary... + + ### Takeaways + {provide key takeaways from the article} + + ### References + - [Reference 1](link) + - [Reference 2](link) + - [Reference 3](link) + + ### About the Author + {write a made up for yourself, give yourself a cyberpunk name and a title} + + - published on {date} in dd/mm/yyyy + """), + markdown=True, + show_tool_calls=True, + add_datetime_to_instructions=True, + save_response_to_file="tmp/{message}.md", + # debug_mode=True, +) +agent.print_response("Simulation theory", stream=True) From d10bc9cc9166248ff910a48ab7e233c7be8a6a52 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Wed, 11 Dec 2024 12:42:51 +0200 Subject: [PATCH 16/16] Fix style --- cookbook/playground/multimodal_agent.py | 9 ++++----- phi/llm/openai/chat.py | 4 +++- phi/model/openai/chat.py | 4 +++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index 104177972e..25f0405684 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -14,7 +14,6 @@ from phi.playground import Playground, serve_playground_app from phi.storage.agent.sqlite import SqlAgentStorage from phi.tools.fal_tools import FalTools -from pydantic import BaseModel, Field image_agent_storage_file: str = "tmp/image_agent.db" @@ -26,7 +25,7 @@ description="You are an AI agent that can generate images using DALL-E.", instructions=[ "When the user asks you to create an image, use the `create_image` tool to create the image.", - "Don't provide the URL of the image in the response. Only describe what image was generated." + "Don't provide the URL of the image in the response. Only describe what image was generated.", ], markdown=True, debug_mode=True, @@ -43,7 +42,7 @@ description="You are an AI agent that can generate gifs using the ModelsLabs API.", instructions=[ "When the user asks you to create an image, use the `generate_media` tool to create the image.", - "Don't provide the URL of the image in the response. Only describe what image was generated." + "Don't provide the URL of the image in the response. Only describe what image was generated.", ], markdown=True, debug_mode=True, @@ -60,7 +59,7 @@ description="You are an AI agent that can generate videos using the ModelsLabs API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", - "Don't provide the URL of the video in the response. Only describe what video was generated." + "Don't provide the URL of the video in the response. Only describe what video was generated.", ], markdown=True, debug_mode=True, @@ -77,7 +76,7 @@ description="You are an AI agent that can generate videos using the Fal API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", - "Don't provide the URL of the video in the response. Only describe what video was generated." + "Don't provide the URL of the video in the response. Only describe what video was generated.", ], markdown=True, debug_mode=True, diff --git a/phi/llm/openai/chat.py b/phi/llm/openai/chat.py index 60b3fe2e3c..666313522d 100644 --- a/phi/llm/openai/chat.py +++ b/phi/llm/openai/chat.py @@ -181,7 +181,9 @@ def to_dict(self) -> Dict[str, Any]: if self.presence_penalty: _dict["presence_penalty"] = self.presence_penalty if self.response_format: - _dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format) + _dict["response_format"] = ( + self.response_format if isinstance(self.response_format, dict) else str(self.response_format) + ) if self.seed is not None: _dict["seed"] = self.seed if self.stop: diff --git a/phi/model/openai/chat.py b/phi/model/openai/chat.py index 66dbf6242f..ef177512c8 100644 --- a/phi/model/openai/chat.py +++ b/phi/model/openai/chat.py @@ -255,7 +255,9 @@ def to_dict(self) -> Dict[str, Any]: if self.presence_penalty is not None: model_dict["presence_penalty"] = self.presence_penalty if self.response_format is not None: - model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format) + model_dict["response_format"] = ( + self.response_format if isinstance(self.response_format, dict) else str(self.response_format) + ) if self.seed is not None: model_dict["seed"] = self.seed if self.stop is not None: