From 0a739bc707b6d4a8a71b946e529b8ab004135773 Mon Sep 17 00:00:00 2001
From: ayush0054
Date: Tue, 10 Dec 2024 01:59:23 +0530
Subject: [PATCH 01/16] luma lab video generation
---
cookbook/tools/lumaAgent.py | 37 +++++++++++++
phi/tools/lumalab.py | 103 ++++++++++++++++++++++++++++++++++++
2 files changed, 140 insertions(+)
create mode 100644 cookbook/tools/lumaAgent.py
create mode 100644 phi/tools/lumalab.py
diff --git a/cookbook/tools/lumaAgent.py b/cookbook/tools/lumaAgent.py
new file mode 100644
index 0000000000..a09d7c22fb
--- /dev/null
+++ b/cookbook/tools/lumaAgent.py
@@ -0,0 +1,37 @@
+from phi.agent import Agent
+from phi.llm.openai import OpenAIChat
+from phi.tools.lumalab import LumaLab
+
+"""Create an agent specialized for Luma AI video generation"""
+
+luma_agent = Agent(
+ name="Luma Video Agent",
+ agent_id="luma-video-agent",
+ llm=OpenAIChat(model="gpt-4o"),
+ tools=[LumaLab()], # Using the LumaLab tool we created
+ markdown=True,
+ debug_mode=True,
+ show_tool_calls=True,
+ instructions=[
+ "You are an agent designed to generate videos using the Luma AI API.",
+ "When asked to generate a video, use the generate_video function from the LumaLab toolkit.",
+ "Only pass the required parameters to the generate_video function unless specifically asked for other parameters.",
+ "The default parameters are:",
+ "- loop: False",
+ "- aspect_ratio: '16:9'",
+ "- keyframes: None",
+ "After generating the video, display the video URL in markdown format.",
+ "If the video generation is async (wait_for_completion=False), inform the user about the generation ID.",
+ "If any errors occur during generation, clearly communicate them to the user.",
+ ],
+ system_message=(
+ "Do not modify any default parameters of the generate_video function "
+ "unless explicitly specified in the user's request. Always provide clear "
+ "feedback about the video generation status."
+ ),
+)
+
+luma_agent.run("Generate a video of a sunset over a peaceful ocean with gentle waves")
+
+
+
diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py
new file mode 100644
index 0000000000..7912a1ebc0
--- /dev/null
+++ b/phi/tools/lumalab.py
@@ -0,0 +1,103 @@
+import time
+import json
+from os import getenv
+from typing import Optional, Dict, Any
+
+from phi.agent import Agent
+from phi.tools import Toolkit
+from phi.utils.log import logger
+
+try:
+ from lumaai import LumaAI
+except ImportError:
+ raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`")
+
+
+class LumaLab(Toolkit):
+ def __init__(
+ self,
+ api_key: Optional[str] = None,
+ wait_for_completion: bool = True,
+ poll_interval: int = 3,
+ max_wait_time: int = 300, # 5 minutes
+ ):
+ super().__init__(name="luma_lab")
+
+ self.wait_for_completion = wait_for_completion
+ self.poll_interval = poll_interval
+ self.max_wait_time = max_wait_time
+ self.api_key = api_key or getenv("LUMAAI_API_KEY")
+
+ if not self.api_key:
+ logger.error("LUMAAI_API_KEY not set. Please set the LUMAAI_API_KEY environment variable.")
+
+ self.client = LumaAI(auth_token=self.api_key)
+ self.register(self.generate_video)
+
+ def generate_video(
+ self,
+ agent: Agent,
+ prompt: str,
+ loop: bool = False,
+ aspect_ratio: str = "16:9",
+ keyframes: Optional[Dict[str, Any]] = None,
+ ) -> str:
+ """Use this function to generate a video given a prompt.
+
+ Args:
+ prompt (str): A text description of the desired video.
+ loop (bool, optional): Whether the video should loop. Defaults to False.
+ aspect_ratio (str, optional): Aspect ratio of the video. Defaults to "16:9".
+ keyframes (Dict[str, Any], optional): Keyframe configuration for image-to-video or video extension.
+
+ Returns:
+ str: A message indicating if the video has been generated successfully or an error message.
+ """
+ if not self.api_key:
+ return "Please set the LUMAAI_API_KEY"
+
+ try:
+ # Create generation request
+ generation_params = {
+ "prompt": prompt,
+ "loop": loop,
+ "aspect_ratio": aspect_ratio,
+ }
+ if keyframes:
+ generation_params["keyframes"] = keyframes
+
+ logger.debug(f"Generating video with params: {generation_params}")
+ generation = self.client.generations.create(**generation_params)
+
+ if not self.wait_for_completion:
+ agent.add_video(json.dumps({"id": generation.id}))
+ return f"Video generation started with ID: {generation.id}"
+
+ # Poll for completion
+ completed = False
+ seconds_waited = 0
+ while not completed and seconds_waited < self.max_wait_time:
+ generation = self.client.generations.get(id=generation.id)
+
+ if generation.state == "completed":
+ completed = True
+ video_url = generation.assets.video
+ agent.add_video(json.dumps({
+ "id": generation.id,
+ "url": video_url,
+ "state": "completed"
+ }))
+ return f"Video generated successfully: {video_url}"
+ elif generation.state == "failed":
+ return f"Generation failed: {generation.failure_reason}"
+
+ logger.info(f"Generation in progress... State: {generation.state}")
+ time.sleep(self.poll_interval)
+ seconds_waited += self.poll_interval
+
+ if not completed:
+ return f"Video generation timed out after {self.max_wait_time} seconds"
+
+ except Exception as e:
+ logger.error(f"Failed to generate video: {e}")
+ return f"Error: {e}"
From d438e3a6b81a0ba667807a7f056bf403b20ff5a0 Mon Sep 17 00:00:00 2001
From: ayush0054
Date: Tue, 10 Dec 2024 14:53:22 +0530
Subject: [PATCH 02/16] added image to video funcationality ,fixed formatting
and mypy errors
---
cookbook/tools/lumaAgent.py | 37 ---------
cookbook/tools/lumalabs_tool.py | 46 +++++++++++
phi/tools/lumalab.py | 133 ++++++++++++++++++++++++--------
3 files changed, 148 insertions(+), 68 deletions(-)
delete mode 100644 cookbook/tools/lumaAgent.py
create mode 100644 cookbook/tools/lumalabs_tool.py
diff --git a/cookbook/tools/lumaAgent.py b/cookbook/tools/lumaAgent.py
deleted file mode 100644
index a09d7c22fb..0000000000
--- a/cookbook/tools/lumaAgent.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from phi.agent import Agent
-from phi.llm.openai import OpenAIChat
-from phi.tools.lumalab import LumaLab
-
-"""Create an agent specialized for Luma AI video generation"""
-
-luma_agent = Agent(
- name="Luma Video Agent",
- agent_id="luma-video-agent",
- llm=OpenAIChat(model="gpt-4o"),
- tools=[LumaLab()], # Using the LumaLab tool we created
- markdown=True,
- debug_mode=True,
- show_tool_calls=True,
- instructions=[
- "You are an agent designed to generate videos using the Luma AI API.",
- "When asked to generate a video, use the generate_video function from the LumaLab toolkit.",
- "Only pass the required parameters to the generate_video function unless specifically asked for other parameters.",
- "The default parameters are:",
- "- loop: False",
- "- aspect_ratio: '16:9'",
- "- keyframes: None",
- "After generating the video, display the video URL in markdown format.",
- "If the video generation is async (wait_for_completion=False), inform the user about the generation ID.",
- "If any errors occur during generation, clearly communicate them to the user.",
- ],
- system_message=(
- "Do not modify any default parameters of the generate_video function "
- "unless explicitly specified in the user's request. Always provide clear "
- "feedback about the video generation status."
- ),
-)
-
-luma_agent.run("Generate a video of a sunset over a peaceful ocean with gentle waves")
-
-
-
diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py
new file mode 100644
index 0000000000..bfa9f60435
--- /dev/null
+++ b/cookbook/tools/lumalabs_tool.py
@@ -0,0 +1,46 @@
+from phi.agent import Agent
+from phi.llm.openai import OpenAIChat
+from phi.tools.lumalab import LumaLab
+
+"""Create an agent specialized for Luma AI video generation"""
+
+luma_agent = Agent(
+ name="Luma Video Agent",
+ agent_id="luma-video-agent",
+ llm=OpenAIChat(model="gpt-4o"),
+ tools=[LumaLab()], # Using the LumaLab tool we created
+ markdown=True,
+ debug_mode=True,
+ show_tool_calls=True,
+ instructions=[
+ "You are an agent designed to generate videos using the Luma AI API.",
+ "You can generate videos in two ways:",
+ "1. Text-to-Video Generation:",
+ " - Use the generate_video function for creating videos from text prompts",
+ " - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None",
+ "2. Image-to-Video Generation:",
+ " - Use the image_to_video function when starting from one or two images",
+ " - Required parameters: prompt, image_url",
+ " - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'",
+ " - The image URLs must be publicly accessible",
+ "After generating any video:",
+ "- Display the video URL in markdown format",
+ "- If generation is async (wait_for_completion=False), inform about the generation ID",
+ "- Clearly communicate any errors that occur",
+ "Choose the appropriate function based on whether the user provides image URLs or just a text prompt.",
+ ],
+ system_message=(
+ "Use generate_video for text-to-video requests and image_to_video for image-based "
+ "generation. Don't modify default parameters unless specifically requested. "
+ "Always provide clear feedback about the video generation status."
+ ),
+)
+
+# luma_agent.run("Generate a video of a car in a city")
+# luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg")
+luma_agent.run("""
+Create a transition video between these two images:
+Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380
+End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380
+Make it a smooth, natural movement
+""")
diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py
index 7912a1ebc0..6336d2460d 100644
--- a/phi/tools/lumalab.py
+++ b/phi/tools/lumalab.py
@@ -1,18 +1,27 @@
import time
import json
from os import getenv
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, Literal, TypedDict
from phi.agent import Agent
from phi.tools import Toolkit
from phi.utils.log import logger
try:
- from lumaai import LumaAI
+ from lumaai import LumaAI # type: ignore
except ImportError:
raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`")
+# Define types for keyframe structure
+class KeyframeImage(TypedDict):
+ type: Literal["image"]
+ url: str
+
+
+Keyframes = Dict[str, KeyframeImage]
+
+
class LumaLab(Toolkit):
def __init__(
self,
@@ -33,61 +42,124 @@ def __init__(
self.client = LumaAI(auth_token=self.api_key)
self.register(self.generate_video)
+ self.register(self.image_to_video)
- def generate_video(
+ def image_to_video(
self,
agent: Agent,
prompt: str,
+ image_url: str,
+ end_image_url: Optional[str] = None,
loop: bool = False,
- aspect_ratio: str = "16:9",
- keyframes: Optional[Dict[str, Any]] = None,
+ aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9",
) -> str:
- """Use this function to generate a video given a prompt.
+ """Generate a video from one or two images with a prompt.
Args:
- prompt (str): A text description of the desired video.
- loop (bool, optional): Whether the video should loop. Defaults to False.
- aspect_ratio (str, optional): Aspect ratio of the video. Defaults to "16:9".
- keyframes (Dict[str, Any], optional): Keyframe configuration for image-to-video or video extension.
+ agent: The agent instance
+ prompt: Text description of the desired video
+ image_url: URL of the starting image
+ end_image_url: Optional URL of the ending image
+ loop: Whether the video should loop
+ aspect_ratio: Aspect ratio of the output video
Returns:
- str: A message indicating if the video has been generated successfully or an error message.
+ str: Status message or error
"""
if not self.api_key:
return "Please set the LUMAAI_API_KEY"
try:
- # Create generation request
- generation_params = {
+ # Construct keyframes
+ keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": image_url}}
+
+ # Add end image if provided
+ if end_image_url:
+ keyframes["frame1"] = {"type": "image", "url": end_image_url}
+
+ # Create generation with keyframes
+ generation = self.client.generations.create(
+ prompt=prompt,
+ loop=loop,
+ aspect_ratio=aspect_ratio,
+ keyframes=keyframes, # type: ignore
+ )
+
+ if not self.wait_for_completion:
+ if generation and generation.id:
+ agent.add_video(json.dumps({"id": generation.id}))
+ return f"Video generation started with ID: {generation.id}"
+ return "Failed to start video generation: No generation ID received"
+
+ # Poll for completion
+ seconds_waited = 0
+ while seconds_waited < self.max_wait_time:
+ if not generation or not generation.id:
+ return "Failed to get generation ID"
+
+ generation = self.client.generations.get(generation.id)
+
+ if generation.state == "completed" and generation.assets:
+ video_url = generation.assets.video
+ if video_url:
+ agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"}))
+ return f"Video generated successfully: {video_url}"
+ elif generation.state == "failed":
+ return f"Generation failed: {generation.failure_reason}"
+
+ logger.info(f"Generation in progress... State: {generation.state}")
+ time.sleep(self.poll_interval)
+ seconds_waited += self.poll_interval
+
+ return f"Video generation timed out after {self.max_wait_time} seconds"
+
+ except Exception as e:
+ logger.error(f"Failed to generate video: {e}")
+ return f"Error: {e}"
+
+ def generate_video(
+ self,
+ agent: Agent,
+ prompt: str,
+ loop: bool = False,
+ aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9",
+ keyframes: Optional[Dict[str, Dict[str, str]]] = None,
+ ) -> str:
+ """Use this function to generate a video given a prompt."""
+ if not self.api_key:
+ return "Please set the LUMAAI_API_KEY"
+
+ try:
+ generation_params: Dict[str, Any] = {
"prompt": prompt,
"loop": loop,
"aspect_ratio": aspect_ratio,
}
- if keyframes:
+
+ if keyframes is not None:
generation_params["keyframes"] = keyframes
- logger.debug(f"Generating video with params: {generation_params}")
- generation = self.client.generations.create(**generation_params)
+ generation = self.client.generations.create(**generation_params) # type: ignore
if not self.wait_for_completion:
- agent.add_video(json.dumps({"id": generation.id}))
- return f"Video generation started with ID: {generation.id}"
+ if generation and generation.id:
+ agent.add_video(json.dumps({"id": generation.id}))
+ return f"Video generation started with ID: {generation.id}"
+ return "Failed to start video generation: No generation ID received"
# Poll for completion
- completed = False
seconds_waited = 0
- while not completed and seconds_waited < self.max_wait_time:
- generation = self.client.generations.get(id=generation.id)
+ while seconds_waited < self.max_wait_time:
+ if not generation or not generation.id:
+ return "Failed to get generation ID"
+
+ generation = self.client.generations.get(generation.id)
- if generation.state == "completed":
- completed = True
+ if generation.state == "completed" and generation.assets:
video_url = generation.assets.video
- agent.add_video(json.dumps({
- "id": generation.id,
- "url": video_url,
- "state": "completed"
- }))
- return f"Video generated successfully: {video_url}"
+ if video_url:
+ agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"}))
+ return f"Video generated successfully: {video_url}"
elif generation.state == "failed":
return f"Generation failed: {generation.failure_reason}"
@@ -95,8 +167,7 @@ def generate_video(
time.sleep(self.poll_interval)
seconds_waited += self.poll_interval
- if not completed:
- return f"Video generation timed out after {self.max_wait_time} seconds"
+ return f"Video generation timed out after {self.max_wait_time} seconds"
except Exception as e:
logger.error(f"Failed to generate video: {e}")
From e85e388c2330f0399b894cb4eb277166522452d8 Mon Sep 17 00:00:00 2001
From: ayush0054
Date: Tue, 10 Dec 2024 16:17:22 +0530
Subject: [PATCH 03/16] updated according to comments/review
---
cookbook/tools/lumalabs_tool.py | 20 ++++++++++----------
phi/tools/lumalab.py | 22 +++++++++-------------
2 files changed, 19 insertions(+), 23 deletions(-)
diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py
index bfa9f60435..43fe5c6f2a 100644
--- a/cookbook/tools/lumalabs_tool.py
+++ b/cookbook/tools/lumalabs_tool.py
@@ -1,6 +1,6 @@
from phi.agent import Agent
from phi.llm.openai import OpenAIChat
-from phi.tools.lumalab import LumaLab
+from phi.tools.lumalab import LumaLabToolkit
"""Create an agent specialized for Luma AI video generation"""
@@ -8,7 +8,7 @@
name="Luma Video Agent",
agent_id="luma-video-agent",
llm=OpenAIChat(model="gpt-4o"),
- tools=[LumaLab()], # Using the LumaLab tool we created
+ tools=[LumaLabToolkit()], # Using the LumaLab tool we created
markdown=True,
debug_mode=True,
show_tool_calls=True,
@@ -20,7 +20,7 @@
" - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None",
"2. Image-to-Video Generation:",
" - Use the image_to_video function when starting from one or two images",
- " - Required parameters: prompt, image_url",
+ " - Required parameters: prompt, start_image_url",
" - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'",
" - The image URLs must be publicly accessible",
"After generating any video:",
@@ -36,11 +36,11 @@
),
)
-# luma_agent.run("Generate a video of a car in a city")
+luma_agent.run("Generate a video of a car in a sky")
# luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg")
-luma_agent.run("""
-Create a transition video between these two images:
-Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380
-End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380
-Make it a smooth, natural movement
-""")
+# luma_agent.run("""
+# Create a transition video between these two images:
+# Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380
+# End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380
+# Make it a smooth, natural movement
+# """)
diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py
index 6336d2460d..4caaf67436 100644
--- a/phi/tools/lumalab.py
+++ b/phi/tools/lumalab.py
@@ -1,11 +1,11 @@
import time
-import json
from os import getenv
from typing import Optional, Dict, Any, Literal, TypedDict
from phi.agent import Agent
from phi.tools import Toolkit
from phi.utils.log import logger
+from phi.model.content import Video
try:
from lumaai import LumaAI # type: ignore
@@ -22,7 +22,7 @@ class KeyframeImage(TypedDict):
Keyframes = Dict[str, KeyframeImage]
-class LumaLab(Toolkit):
+class LumaLabToolkit(Toolkit):
def __init__(
self,
api_key: Optional[str] = None,
@@ -48,7 +48,7 @@ def image_to_video(
self,
agent: Agent,
prompt: str,
- image_url: str,
+ start_image_url: str,
end_image_url: Optional[str] = None,
loop: bool = False,
aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9",
@@ -58,7 +58,7 @@ def image_to_video(
Args:
agent: The agent instance
prompt: Text description of the desired video
- image_url: URL of the starting image
+ start_image_url: URL of the starting image
end_image_url: Optional URL of the ending image
loop: Whether the video should loop
aspect_ratio: Aspect ratio of the output video
@@ -66,12 +66,10 @@ def image_to_video(
Returns:
str: Status message or error
"""
- if not self.api_key:
- return "Please set the LUMAAI_API_KEY"
try:
# Construct keyframes
- keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": image_url}}
+ keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": start_image_url}}
# Add end image if provided
if end_image_url:
@@ -87,7 +85,7 @@ def image_to_video(
if not self.wait_for_completion:
if generation and generation.id:
- agent.add_video(json.dumps({"id": generation.id}))
+ agent.add_video(Video(id=generation.id))
return f"Video generation started with ID: {generation.id}"
return "Failed to start video generation: No generation ID received"
@@ -102,7 +100,7 @@ def image_to_video(
if generation.state == "completed" and generation.assets:
video_url = generation.assets.video
if video_url:
- agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"}))
+ agent.add_video(Video(id=generation.id, url=video_url, eta="completed"))
return f"Video generated successfully: {video_url}"
elif generation.state == "failed":
return f"Generation failed: {generation.failure_reason}"
@@ -126,8 +124,6 @@ def generate_video(
keyframes: Optional[Dict[str, Dict[str, str]]] = None,
) -> str:
"""Use this function to generate a video given a prompt."""
- if not self.api_key:
- return "Please set the LUMAAI_API_KEY"
try:
generation_params: Dict[str, Any] = {
@@ -143,7 +139,7 @@ def generate_video(
if not self.wait_for_completion:
if generation and generation.id:
- agent.add_video(json.dumps({"id": generation.id}))
+ agent.add_video(Video(id=generation.id))
return f"Video generation started with ID: {generation.id}"
return "Failed to start video generation: No generation ID received"
@@ -158,7 +154,7 @@ def generate_video(
if generation.state == "completed" and generation.assets:
video_url = generation.assets.video
if video_url:
- agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"}))
+ agent.add_video(Video(id=generation.id, url=video_url, state="completed"))
return f"Video generated successfully: {video_url}"
elif generation.state == "failed":
return f"Generation failed: {generation.failure_reason}"
From 47f59f1ce2db4228b0dd2b4345fd951505928fb2 Mon Sep 17 00:00:00 2001
From: ayush0054
Date: Tue, 10 Dec 2024 16:24:08 +0530
Subject: [PATCH 04/16] formatting
---
phi/tools/lumalab.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py
index 4caaf67436..32723007de 100644
--- a/phi/tools/lumalab.py
+++ b/phi/tools/lumalab.py
@@ -85,7 +85,7 @@ def image_to_video(
if not self.wait_for_completion:
if generation and generation.id:
- agent.add_video(Video(id=generation.id))
+ agent.add_video(Video(id=generation.id, url=None))
return f"Video generation started with ID: {generation.id}"
return "Failed to start video generation: No generation ID received"
@@ -139,7 +139,7 @@ def generate_video(
if not self.wait_for_completion:
if generation and generation.id:
- agent.add_video(Video(id=generation.id))
+ agent.add_video(Video(id=generation.id, url=None))
return f"Video generation started with ID: {generation.id}"
return "Failed to start video generation: No generation ID received"
From b73b71d16d34212044018fc7d2575f01520f6558 Mon Sep 17 00:00:00 2001
From: Ashpreet Bedi
Date: Tue, 10 Dec 2024 14:00:08 +0000
Subject: [PATCH 05/16] README
---
README.md | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 50a5b995be..677104fcf4 100644
--- a/README.md
+++ b/README.md
@@ -17,8 +17,6 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning
style="border-radius: 8px;"
/>
-# What is phidata?
-
**Phidata is a framework for building multi-modal agents with memory, knowledge, tools and reasoning.**
## Install
@@ -27,7 +25,7 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning
pip install -U phidata
```
-# Key Features
+## Key Features
- [Simple & Elegant](#simple--elegant)
- [Powerful & Flexible](#powerful--flexible)
From 2473a1c247c1fc67d482fa4e7549ba428c470b7d Mon Sep 17 00:00:00 2001
From: Ashpreet Bedi
Date: Tue, 10 Dec 2024 14:03:20 +0000
Subject: [PATCH 06/16] README
---
README.md | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 677104fcf4..bea3e57fda 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
-Build multi-modal Agents with memory, knowledge, tools and reasoning
+A framework for building multi-modal Agents with memory, knowledge, tools and reasoning
-**Phidata is a framework for building multi-modal agents with memory, knowledge, tools and reasoning.**
-
## Install
```shell
From 2224385b82ae8621196cd4e1ece3a2a1f693af21 Mon Sep 17 00:00:00 2001
From: Ashpreet Bedi
Date: Tue, 10 Dec 2024 14:06:46 +0000
Subject: [PATCH 07/16] README
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index bea3e57fda..0f7fda807b 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
-A framework for building multi-modal Agents with memory, knowledge, tools and reasoning
+Building multi-modal Agents with memory, knowledge, tools and reasoning. Chat with them using a beautiful Agent UI.
Date: Tue, 10 Dec 2024 14:07:18 +0000
Subject: [PATCH 08/16] README
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 0f7fda807b..fd0f95c50e 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
-Building multi-modal Agents with memory, knowledge, tools and reasoning. Chat with them using a beautiful Agent UI.
+Building multi-modal Agents with memory, knowledge, tools and reasoning.
Date: Tue, 10 Dec 2024 14:32:41 +0000
Subject: [PATCH 09/16] README
---
README.md | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index fd0f95c50e..5f70c90895 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
-Building multi-modal Agents with memory, knowledge, tools and reasoning.
+Build multi-modal Agents with memory, knowledge, tools and reasoning.
+## What is phidata?
+
+**Phidata is a framework for building multi-modal agents**, use phidata to:
+
+- **Build multi-modal agents with memory, knowledge, tools and reasoning.**
+- **Build teams of agents that can work together to solve problems.**
+- **Chat with your agents using a beautiful Agent UI.**
+
## Install
```shell
From 4a2c492dd014a03ccc3d90035915c9778c84f4d6 Mon Sep 17 00:00:00 2001
From: Ashpreet Bedi
Date: Tue, 10 Dec 2024 15:03:21 +0000
Subject: [PATCH 10/16] README
---
cookbook/agents/30_pre_and_post_hooks.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/cookbook/agents/30_pre_and_post_hooks.py b/cookbook/agents/30_pre_and_post_hooks.py
index 900387b8d1..97a8bcab0b 100644
--- a/cookbook/agents/30_pre_and_post_hooks.py
+++ b/cookbook/agents/30_pre_and_post_hooks.py
@@ -6,16 +6,16 @@
from phi.tools import tool, FunctionCall
-def pre_hook(function_call: FunctionCall):
- print(f"Pre-hook: {function_call.function.name}")
- print(f"Arguments: {function_call.arguments}")
- print(f"Result: {function_call.result}")
+def pre_hook(fc: FunctionCall):
+ print(f"Pre-hook: {fc.function.name}")
+ print(f"Arguments: {fc.arguments}")
+ print(f"Result: {fc.result}")
-def post_hook(function_call: FunctionCall):
- print(f"Post-hook: {function_call.function.name}")
- print(f"Arguments: {function_call.arguments}")
- print(f"Result: {function_call.result}")
+def post_hook(fc: FunctionCall):
+ print(f"Post-hook: {fc.function.name}")
+ print(f"Arguments: {fc.arguments}")
+ print(f"Result: {fc.result}")
@tool(pre_hook=pre_hook, post_hook=post_hook)
From ba59bd457434d306fcf87a419b4e6e057b9c2717 Mon Sep 17 00:00:00 2001
From: Ashpreet Bedi
Date: Tue, 10 Dec 2024 15:07:13 +0000
Subject: [PATCH 11/16] v2.6.6
---
phi/agent/agent.py | 2 +-
phi/model/__init__.py | 1 -
pyproject.toml | 2 +-
3 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/phi/agent/agent.py b/phi/agent/agent.py
index 9cba61c559..21b456fde4 100644
--- a/phi/agent/agent.py
+++ b/phi/agent/agent.py
@@ -31,7 +31,7 @@
from phi.reasoning.step import ReasoningStep, ReasoningSteps, NextAction
from phi.run.response import RunEvent, RunResponse, RunResponseExtraData
from phi.knowledge.agent import AgentKnowledge
-from phi.model import Model
+from phi.model.base import Model
from phi.model.message import Message, MessageReferences
from phi.model.response import ModelResponse, ModelResponseEvent
from phi.memory.agent import AgentMemory, MemoryRetrieval, Memory, AgentRun, SessionSummary # noqa: F401
diff --git a/phi/model/__init__.py b/phi/model/__init__.py
index 00c37db694..e69de29bb2 100644
--- a/phi/model/__init__.py
+++ b/phi/model/__init__.py
@@ -1 +0,0 @@
-from phi.model.base import Model
diff --git a/pyproject.toml b/pyproject.toml
index fa76853404..b3c1fa0a87 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "phidata"
-version = "2.6.5"
+version = "2.6.6"
description = "Build multi-modal Agents with memory, knowledge and tools."
requires-python = ">=3.7"
readme = "README.md"
From 795ee52caab25acae9cd8b7ca3499dfd3a1cf738 Mon Sep 17 00:00:00 2001
From: Dirk Brand
Date: Tue, 10 Dec 2024 19:05:28 +0200
Subject: [PATCH 12/16] Update lumalabs to work with new interface
---
cookbook/tools/lumalabs_tool.py | 11 +++++------
phi/tools/lumalab.py | 20 +++++++++-----------
2 files changed, 14 insertions(+), 17 deletions(-)
diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py
index 43fe5c6f2a..8d87d31f13 100644
--- a/cookbook/tools/lumalabs_tool.py
+++ b/cookbook/tools/lumalabs_tool.py
@@ -1,6 +1,6 @@
from phi.agent import Agent
from phi.llm.openai import OpenAIChat
-from phi.tools.lumalab import LumaLabToolkit
+from phi.tools.lumalab import LumaLabTools
"""Create an agent specialized for Luma AI video generation"""
@@ -8,7 +8,7 @@
name="Luma Video Agent",
agent_id="luma-video-agent",
llm=OpenAIChat(model="gpt-4o"),
- tools=[LumaLabToolkit()], # Using the LumaLab tool we created
+ tools=[LumaLabTools()], # Using the LumaLab tool we created
markdown=True,
debug_mode=True,
show_tool_calls=True,
@@ -23,11 +23,10 @@
" - Required parameters: prompt, start_image_url",
" - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'",
" - The image URLs must be publicly accessible",
- "After generating any video:",
- "- Display the video URL in markdown format",
- "- If generation is async (wait_for_completion=False), inform about the generation ID",
- "- Clearly communicate any errors that occur",
"Choose the appropriate function based on whether the user provides image URLs or just a text prompt.",
+ "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.",
+ "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.",
+ "After generating any video, if generation is async (wait_for_completion=False), inform about the generation ID",
],
system_message=(
"Use generate_video for text-to-video requests and image_to_video for image-based "
diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py
index 32723007de..bebb2b652d 100644
--- a/phi/tools/lumalab.py
+++ b/phi/tools/lumalab.py
@@ -1,4 +1,5 @@
import time
+import uuid
from os import getenv
from typing import Optional, Dict, Any, Literal, TypedDict
@@ -22,7 +23,7 @@ class KeyframeImage(TypedDict):
Keyframes = Dict[str, KeyframeImage]
-class LumaLabToolkit(Toolkit):
+class LumaLabTools(Toolkit):
def __init__(
self,
api_key: Optional[str] = None,
@@ -83,11 +84,10 @@ def image_to_video(
keyframes=keyframes, # type: ignore
)
+ video_id = str(uuid.uuid4())
+
if not self.wait_for_completion:
- if generation and generation.id:
- agent.add_video(Video(id=generation.id, url=None))
- return f"Video generation started with ID: {generation.id}"
- return "Failed to start video generation: No generation ID received"
+ return "Async generation unsupported"
# Poll for completion
seconds_waited = 0
@@ -100,7 +100,7 @@ def image_to_video(
if generation.state == "completed" and generation.assets:
video_url = generation.assets.video
if video_url:
- agent.add_video(Video(id=generation.id, url=video_url, eta="completed"))
+ agent.add_video(Video(id=video_id, url=video_url, eta="completed"))
return f"Video generated successfully: {video_url}"
elif generation.state == "failed":
return f"Generation failed: {generation.failure_reason}"
@@ -137,11 +137,9 @@ def generate_video(
generation = self.client.generations.create(**generation_params) # type: ignore
+ video_id = str(uuid.uuid4())
if not self.wait_for_completion:
- if generation and generation.id:
- agent.add_video(Video(id=generation.id, url=None))
- return f"Video generation started with ID: {generation.id}"
- return "Failed to start video generation: No generation ID received"
+ return "Async generation unsupported"
# Poll for completion
seconds_waited = 0
@@ -154,7 +152,7 @@ def generate_video(
if generation.state == "completed" and generation.assets:
video_url = generation.assets.video
if video_url:
- agent.add_video(Video(id=generation.id, url=video_url, state="completed"))
+ agent.add_video(Video(id=video_id, url=video_url, state="completed"))
return f"Video generated successfully: {video_url}"
elif generation.state == "failed":
return f"Generation failed: {generation.failure_reason}"
From 33715d3cfbc75c06e30815331ce011558c0c03b7 Mon Sep 17 00:00:00 2001
From: Ashpreet Bedi
Date: Tue, 10 Dec 2024 22:42:43 +0000
Subject: [PATCH 13/16] README
---
README.md | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index 5f70c90895..9d87332bb2 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning.
pip install -U phidata
```
-## Key Features
+## Examples & Key Features
- [Simple & Elegant](#simple--elegant)
- [Powerful & Flexible](#powerful--flexible)
@@ -46,7 +46,9 @@ pip install -U phidata
## Simple & Elegant
-Phidata Agents are simple and elegant, resulting in minimal, beautiful code. For example, you can create a web search agent using 10 lines of code, create a file `web_search.py`
+Phidata Agents are simple and elegant, resulting in minimal, beautiful code.
+
+For example, you can create a web search agent in 10 lines of code, create a file `web_search.py`
```python
from phi.agent import Agent
@@ -75,7 +77,9 @@ python web_search.py
## Powerful & Flexible
-Phidata agents can use multiple tools and follow instructions to achieve complex tasks. For example, you can create a finance agent that can query financial data, create a file `finance_agent.py`
+Phidata agents can use multiple tools and follow instructions to achieve complex tasks.
+
+For example, you can create a finance agent with tools to query financial data, create a file `finance_agent.py`
```python
from phi.agent import Agent
@@ -103,7 +107,9 @@ python finance_agent.py
## Multi-Modal by default
-Phidata agents support text, images, audio and video. For example, you can create an image agent that can understand images and make tool calls as needed, create a file `image_agent.py`
+Phidata agents support text, images, audio and video.
+
+For example, you can create an image agent that can understand images and make tool calls as needed, create a file `image_agent.py`
```python
from phi.agent import Agent
From faf8f35c15e67db8055afca3d7ae4dead93c1ce8 Mon Sep 17 00:00:00 2001
From: Ashpreet Bedi
Date: Tue, 10 Dec 2024 23:06:30 +0000
Subject: [PATCH 14/16] README
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 9d87332bb2..367f117df7 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning.
pip install -U phidata
```
-## Examples & Key Features
+## Key Features
- [Simple & Elegant](#simple--elegant)
- [Powerful & Flexible](#powerful--flexible)
From 78cbcbf43e113b2ca4ffa6b30f4065a45ee661c5 Mon Sep 17 00:00:00 2001
From: Ashpreet Bedi
Date: Tue, 10 Dec 2024 23:22:42 +0000
Subject: [PATCH 15/16] README
---
README.md | 15 ++++---
cookbook/agents/43_research_agent_exa.py | 55 ++++++++++++++++++++++++
2 files changed, 63 insertions(+), 7 deletions(-)
create mode 100644 cookbook/agents/43_research_agent_exa.py
diff --git a/README.md b/README.md
index 367f117df7..569b649211 100644
--- a/README.md
+++ b/README.md
@@ -225,14 +225,18 @@ if __name__ == "__main__":
serve_playground_app("playground:app", reload=True)
```
-Authenticate with phidata:
-```
+Authenticate with phidata by running the following command:
+
+```shell
phi auth
```
-> [!NOTE]
-> If `phi auth` fails, you can set the `PHI_API_KEY` environment variable by copying it from [phidata.app](https://www.phidata.app)
+or by exporting the `PHI_API_KEY` for your workspace from [phidata.app](https://www.phidata.app)
+
+```bash
+export PHI_API_KEY=phi-***
+```
Install dependencies and run the Agent Playground:
@@ -383,9 +387,6 @@ python reasoning_agent.py
>
> It is an experiment fueled by curiosity, combining COT and tool use. Set your expectations very low for this initial release. For example: It will not be able to count ‘r’s in ‘strawberry’.
-> [!TIP]
-> If using tools with `reasoning=True`, set `structured_outputs=False` because gpt-4o doesnt support tools with structured outputs.
-
## Demo Agents
The Agent Playground includes a few demo agents that you can test with. If you have recommendations for other demo agents, please let us know in our [community forum](https://community.phidata.com/).
diff --git a/cookbook/agents/43_research_agent_exa.py b/cookbook/agents/43_research_agent_exa.py
new file mode 100644
index 0000000000..4184f5276c
--- /dev/null
+++ b/cookbook/agents/43_research_agent_exa.py
@@ -0,0 +1,55 @@
+"""Please install dependencies using:
+pip install openai exa-py phidata
+"""
+
+from textwrap import dedent
+from datetime import datetime
+
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.exa import ExaTools
+
+agent = Agent(
+ model=OpenAIChat(id="gpt-4o"),
+ tools=[ExaTools(start_published_date=datetime.now().strftime("%Y-%m-%d"), type="keyword")],
+ description="You are an advanced AI researcher writing a report on a topic.",
+ instructions=[
+ "For the provided topic, run 3 different searches.",
+ "Read the results carefully and prepare a NYT worthy report.",
+ "Focus on facts and make sure to provide references.",
+ ],
+ expected_output=dedent("""\
+ An engaging, informative, and well-structured report in markdown format:
+
+ ## Engaging Report Title
+
+ ### Overview
+ {give a brief introduction of the report and why the user should read this report}
+ {make this section engaging and create a hook for the reader}
+
+ ### Section 1
+ {break the report into sections}
+ {provide details/facts/processes in this section}
+
+ ... more sections as necessary...
+
+ ### Takeaways
+ {provide key takeaways from the article}
+
+ ### References
+ - [Reference 1](link)
+ - [Reference 2](link)
+ - [Reference 3](link)
+
+ ### About the Author
+ {write a made up for yourself, give yourself a cyberpunk name and a title}
+
+ - published on {date} in dd/mm/yyyy
+ """),
+ markdown=True,
+ show_tool_calls=True,
+ add_datetime_to_instructions=True,
+ save_response_to_file="tmp/{message}.md",
+ # debug_mode=True,
+)
+agent.print_response("Simulation theory", stream=True)
From d10bc9cc9166248ff910a48ab7e233c7be8a6a52 Mon Sep 17 00:00:00 2001
From: Dirk Brand
Date: Wed, 11 Dec 2024 12:42:51 +0200
Subject: [PATCH 16/16] Fix style
---
cookbook/playground/multimodal_agent.py | 9 ++++-----
phi/llm/openai/chat.py | 4 +++-
phi/model/openai/chat.py | 4 +++-
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py
index 104177972e..25f0405684 100644
--- a/cookbook/playground/multimodal_agent.py
+++ b/cookbook/playground/multimodal_agent.py
@@ -14,7 +14,6 @@
from phi.playground import Playground, serve_playground_app
from phi.storage.agent.sqlite import SqlAgentStorage
from phi.tools.fal_tools import FalTools
-from pydantic import BaseModel, Field
image_agent_storage_file: str = "tmp/image_agent.db"
@@ -26,7 +25,7 @@
description="You are an AI agent that can generate images using DALL-E.",
instructions=[
"When the user asks you to create an image, use the `create_image` tool to create the image.",
- "Don't provide the URL of the image in the response. Only describe what image was generated."
+ "Don't provide the URL of the image in the response. Only describe what image was generated.",
],
markdown=True,
debug_mode=True,
@@ -43,7 +42,7 @@
description="You are an AI agent that can generate gifs using the ModelsLabs API.",
instructions=[
"When the user asks you to create an image, use the `generate_media` tool to create the image.",
- "Don't provide the URL of the image in the response. Only describe what image was generated."
+ "Don't provide the URL of the image in the response. Only describe what image was generated.",
],
markdown=True,
debug_mode=True,
@@ -60,7 +59,7 @@
description="You are an AI agent that can generate videos using the ModelsLabs API.",
instructions=[
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
- "Don't provide the URL of the video in the response. Only describe what video was generated."
+ "Don't provide the URL of the video in the response. Only describe what video was generated.",
],
markdown=True,
debug_mode=True,
@@ -77,7 +76,7 @@
description="You are an AI agent that can generate videos using the Fal API.",
instructions=[
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
- "Don't provide the URL of the video in the response. Only describe what video was generated."
+ "Don't provide the URL of the video in the response. Only describe what video was generated.",
],
markdown=True,
debug_mode=True,
diff --git a/phi/llm/openai/chat.py b/phi/llm/openai/chat.py
index 60b3fe2e3c..666313522d 100644
--- a/phi/llm/openai/chat.py
+++ b/phi/llm/openai/chat.py
@@ -181,7 +181,9 @@ def to_dict(self) -> Dict[str, Any]:
if self.presence_penalty:
_dict["presence_penalty"] = self.presence_penalty
if self.response_format:
- _dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+ _dict["response_format"] = (
+ self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+ )
if self.seed is not None:
_dict["seed"] = self.seed
if self.stop:
diff --git a/phi/model/openai/chat.py b/phi/model/openai/chat.py
index 66dbf6242f..ef177512c8 100644
--- a/phi/model/openai/chat.py
+++ b/phi/model/openai/chat.py
@@ -255,7 +255,9 @@ def to_dict(self) -> Dict[str, Any]:
if self.presence_penalty is not None:
model_dict["presence_penalty"] = self.presence_penalty
if self.response_format is not None:
- model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+ model_dict["response_format"] = (
+ self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+ )
if self.seed is not None:
model_dict["seed"] = self.seed
if self.stop is not None: