diff --git a/README.md b/README.md
index 50a5b995be..569b649211 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
-Build multi-modal Agents with memory, knowledge, tools and reasoning
+Build multi-modal Agents with memory, knowledge, tools and reasoning.
-# What is phidata?
+## What is phidata?
-**Phidata is a framework for building multi-modal agents with memory, knowledge, tools and reasoning.**
+**Phidata is a framework for building multi-modal agents**, use phidata to:
+
+- **Build multi-modal agents with memory, knowledge, tools and reasoning.**
+- **Build teams of agents that can work together to solve problems.**
+- **Chat with your agents using a beautiful Agent UI.**
## Install
@@ -27,7 +31,7 @@ Build multi-modal Agents with memory, knowledge, tools and reasoning
pip install -U phidata
```
-# Key Features
+## Key Features
- [Simple & Elegant](#simple--elegant)
- [Powerful & Flexible](#powerful--flexible)
@@ -42,7 +46,9 @@ pip install -U phidata
## Simple & Elegant
-Phidata Agents are simple and elegant, resulting in minimal, beautiful code. For example, you can create a web search agent using 10 lines of code, create a file `web_search.py`
+Phidata Agents are simple and elegant, resulting in minimal, beautiful code.
+
+For example, you can create a web search agent in 10 lines of code, create a file `web_search.py`
```python
from phi.agent import Agent
@@ -71,7 +77,9 @@ python web_search.py
## Powerful & Flexible
-Phidata agents can use multiple tools and follow instructions to achieve complex tasks. For example, you can create a finance agent that can query financial data, create a file `finance_agent.py`
+Phidata agents can use multiple tools and follow instructions to achieve complex tasks.
+
+For example, you can create a finance agent with tools to query financial data, create a file `finance_agent.py`
```python
from phi.agent import Agent
@@ -99,7 +107,9 @@ python finance_agent.py
## Multi-Modal by default
-Phidata agents support text, images, audio and video. For example, you can create an image agent that can understand images and make tool calls as needed, create a file `image_agent.py`
+Phidata agents support text, images, audio and video.
+
+For example, you can create an image agent that can understand images and make tool calls as needed, create a file `image_agent.py`
```python
from phi.agent import Agent
@@ -215,14 +225,18 @@ if __name__ == "__main__":
serve_playground_app("playground:app", reload=True)
```
-Authenticate with phidata:
-```
+Authenticate with phidata by running the following command:
+
+```shell
phi auth
```
-> [!NOTE]
-> If `phi auth` fails, you can set the `PHI_API_KEY` environment variable by copying it from [phidata.app](https://www.phidata.app)
+or by exporting the `PHI_API_KEY` for your workspace from [phidata.app](https://www.phidata.app)
+
+```bash
+export PHI_API_KEY=phi-***
+```
Install dependencies and run the Agent Playground:
@@ -373,9 +387,6 @@ python reasoning_agent.py
>
> It is an experiment fueled by curiosity, combining COT and tool use. Set your expectations very low for this initial release. For example: It will not be able to count ‘r’s in ‘strawberry’.
-> [!TIP]
-> If using tools with `reasoning=True`, set `structured_outputs=False` because gpt-4o doesnt support tools with structured outputs.
-
## Demo Agents
The Agent Playground includes a few demo agents that you can test with. If you have recommendations for other demo agents, please let us know in our [community forum](https://community.phidata.com/).
diff --git a/cookbook/agents/30_pre_and_post_hooks.py b/cookbook/agents/30_pre_and_post_hooks.py
index 900387b8d1..97a8bcab0b 100644
--- a/cookbook/agents/30_pre_and_post_hooks.py
+++ b/cookbook/agents/30_pre_and_post_hooks.py
@@ -6,16 +6,16 @@
from phi.tools import tool, FunctionCall
-def pre_hook(function_call: FunctionCall):
- print(f"Pre-hook: {function_call.function.name}")
- print(f"Arguments: {function_call.arguments}")
- print(f"Result: {function_call.result}")
+def pre_hook(fc: FunctionCall):
+ print(f"Pre-hook: {fc.function.name}")
+ print(f"Arguments: {fc.arguments}")
+ print(f"Result: {fc.result}")
-def post_hook(function_call: FunctionCall):
- print(f"Post-hook: {function_call.function.name}")
- print(f"Arguments: {function_call.arguments}")
- print(f"Result: {function_call.result}")
+def post_hook(fc: FunctionCall):
+ print(f"Post-hook: {fc.function.name}")
+ print(f"Arguments: {fc.arguments}")
+ print(f"Result: {fc.result}")
@tool(pre_hook=pre_hook, post_hook=post_hook)
diff --git a/cookbook/agents/43_research_agent_exa.py b/cookbook/agents/43_research_agent_exa.py
new file mode 100644
index 0000000000..4184f5276c
--- /dev/null
+++ b/cookbook/agents/43_research_agent_exa.py
@@ -0,0 +1,55 @@
+"""Please install dependencies using:
+pip install openai exa-py phidata
+"""
+
+from textwrap import dedent
+from datetime import datetime
+
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.exa import ExaTools
+
+agent = Agent(
+ model=OpenAIChat(id="gpt-4o"),
+ tools=[ExaTools(start_published_date=datetime.now().strftime("%Y-%m-%d"), type="keyword")],
+ description="You are an advanced AI researcher writing a report on a topic.",
+ instructions=[
+ "For the provided topic, run 3 different searches.",
+ "Read the results carefully and prepare a NYT worthy report.",
+ "Focus on facts and make sure to provide references.",
+ ],
+ expected_output=dedent("""\
+ An engaging, informative, and well-structured report in markdown format:
+
+ ## Engaging Report Title
+
+ ### Overview
+ {give a brief introduction of the report and why the user should read this report}
+ {make this section engaging and create a hook for the reader}
+
+ ### Section 1
+ {break the report into sections}
+ {provide details/facts/processes in this section}
+
+ ... more sections as necessary...
+
+ ### Takeaways
+ {provide key takeaways from the article}
+
+ ### References
+ - [Reference 1](link)
+ - [Reference 2](link)
+ - [Reference 3](link)
+
+ ### About the Author
+ {write a made up for yourself, give yourself a cyberpunk name and a title}
+
+ - published on {date} in dd/mm/yyyy
+ """),
+ markdown=True,
+ show_tool_calls=True,
+ add_datetime_to_instructions=True,
+ save_response_to_file="tmp/{message}.md",
+ # debug_mode=True,
+)
+agent.print_response("Simulation theory", stream=True)
diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py
index 104177972e..25f0405684 100644
--- a/cookbook/playground/multimodal_agent.py
+++ b/cookbook/playground/multimodal_agent.py
@@ -14,7 +14,6 @@
from phi.playground import Playground, serve_playground_app
from phi.storage.agent.sqlite import SqlAgentStorage
from phi.tools.fal_tools import FalTools
-from pydantic import BaseModel, Field
image_agent_storage_file: str = "tmp/image_agent.db"
@@ -26,7 +25,7 @@
description="You are an AI agent that can generate images using DALL-E.",
instructions=[
"When the user asks you to create an image, use the `create_image` tool to create the image.",
- "Don't provide the URL of the image in the response. Only describe what image was generated."
+ "Don't provide the URL of the image in the response. Only describe what image was generated.",
],
markdown=True,
debug_mode=True,
@@ -43,7 +42,7 @@
description="You are an AI agent that can generate gifs using the ModelsLabs API.",
instructions=[
"When the user asks you to create an image, use the `generate_media` tool to create the image.",
- "Don't provide the URL of the image in the response. Only describe what image was generated."
+ "Don't provide the URL of the image in the response. Only describe what image was generated.",
],
markdown=True,
debug_mode=True,
@@ -60,7 +59,7 @@
description="You are an AI agent that can generate videos using the ModelsLabs API.",
instructions=[
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
- "Don't provide the URL of the video in the response. Only describe what video was generated."
+ "Don't provide the URL of the video in the response. Only describe what video was generated.",
],
markdown=True,
debug_mode=True,
@@ -77,7 +76,7 @@
description="You are an AI agent that can generate videos using the Fal API.",
instructions=[
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
- "Don't provide the URL of the video in the response. Only describe what video was generated."
+ "Don't provide the URL of the video in the response. Only describe what video was generated.",
],
markdown=True,
debug_mode=True,
diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py
new file mode 100644
index 0000000000..8d87d31f13
--- /dev/null
+++ b/cookbook/tools/lumalabs_tool.py
@@ -0,0 +1,45 @@
+from phi.agent import Agent
+from phi.llm.openai import OpenAIChat
+from phi.tools.lumalab import LumaLabTools
+
+"""Create an agent specialized for Luma AI video generation"""
+
+luma_agent = Agent(
+ name="Luma Video Agent",
+ agent_id="luma-video-agent",
+ llm=OpenAIChat(model="gpt-4o"),
+ tools=[LumaLabTools()], # Using the LumaLab tool we created
+ markdown=True,
+ debug_mode=True,
+ show_tool_calls=True,
+ instructions=[
+ "You are an agent designed to generate videos using the Luma AI API.",
+ "You can generate videos in two ways:",
+ "1. Text-to-Video Generation:",
+ " - Use the generate_video function for creating videos from text prompts",
+ " - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None",
+ "2. Image-to-Video Generation:",
+ " - Use the image_to_video function when starting from one or two images",
+ " - Required parameters: prompt, start_image_url",
+ " - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'",
+ " - The image URLs must be publicly accessible",
+ "Choose the appropriate function based on whether the user provides image URLs or just a text prompt.",
+ "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.",
+ "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.",
+ "After generating any video, if generation is async (wait_for_completion=False), inform about the generation ID",
+ ],
+ system_message=(
+ "Use generate_video for text-to-video requests and image_to_video for image-based "
+ "generation. Don't modify default parameters unless specifically requested. "
+ "Always provide clear feedback about the video generation status."
+ ),
+)
+
+luma_agent.run("Generate a video of a car in a sky")
+# luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg")
+# luma_agent.run("""
+# Create a transition video between these two images:
+# Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380
+# End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380
+# Make it a smooth, natural movement
+# """)
diff --git a/phi/agent/agent.py b/phi/agent/agent.py
index 0acae861bc..16715a79b6 100644
--- a/phi/agent/agent.py
+++ b/phi/agent/agent.py
@@ -32,7 +32,7 @@
from phi.reasoning.step import ReasoningStep, ReasoningSteps, NextAction
from phi.run.response import RunEvent, RunResponse, RunResponseExtraData
from phi.knowledge.agent import AgentKnowledge
-from phi.model import Model
+from phi.model.base import Model
from phi.model.message import Message, MessageReferences
from phi.model.response import ModelResponse, ModelResponseEvent
from phi.memory.agent import AgentMemory, MemoryRetrieval, Memory, AgentRun, SessionSummary # noqa: F401
diff --git a/phi/llm/openai/chat.py b/phi/llm/openai/chat.py
index 60b3fe2e3c..666313522d 100644
--- a/phi/llm/openai/chat.py
+++ b/phi/llm/openai/chat.py
@@ -181,7 +181,9 @@ def to_dict(self) -> Dict[str, Any]:
if self.presence_penalty:
_dict["presence_penalty"] = self.presence_penalty
if self.response_format:
- _dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+ _dict["response_format"] = (
+ self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+ )
if self.seed is not None:
_dict["seed"] = self.seed
if self.stop:
diff --git a/phi/model/__init__.py b/phi/model/__init__.py
index 00c37db694..e69de29bb2 100644
--- a/phi/model/__init__.py
+++ b/phi/model/__init__.py
@@ -1 +0,0 @@
-from phi.model.base import Model
diff --git a/phi/model/openai/chat.py b/phi/model/openai/chat.py
index 66dbf6242f..ef177512c8 100644
--- a/phi/model/openai/chat.py
+++ b/phi/model/openai/chat.py
@@ -255,7 +255,9 @@ def to_dict(self) -> Dict[str, Any]:
if self.presence_penalty is not None:
model_dict["presence_penalty"] = self.presence_penalty
if self.response_format is not None:
- model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+ model_dict["response_format"] = (
+ self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+ )
if self.seed is not None:
model_dict["seed"] = self.seed
if self.stop is not None:
diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py
new file mode 100644
index 0000000000..bebb2b652d
--- /dev/null
+++ b/phi/tools/lumalab.py
@@ -0,0 +1,168 @@
+import time
+import uuid
+from os import getenv
+from typing import Optional, Dict, Any, Literal, TypedDict
+
+from phi.agent import Agent
+from phi.tools import Toolkit
+from phi.utils.log import logger
+from phi.model.content import Video
+
+try:
+ from lumaai import LumaAI # type: ignore
+except ImportError:
+ raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`")
+
+
+# Define types for keyframe structure
+class KeyframeImage(TypedDict):
+ type: Literal["image"]
+ url: str
+
+
+Keyframes = Dict[str, KeyframeImage]
+
+
+class LumaLabTools(Toolkit):
+ def __init__(
+ self,
+ api_key: Optional[str] = None,
+ wait_for_completion: bool = True,
+ poll_interval: int = 3,
+ max_wait_time: int = 300, # 5 minutes
+ ):
+ super().__init__(name="luma_lab")
+
+ self.wait_for_completion = wait_for_completion
+ self.poll_interval = poll_interval
+ self.max_wait_time = max_wait_time
+ self.api_key = api_key or getenv("LUMAAI_API_KEY")
+
+ if not self.api_key:
+ logger.error("LUMAAI_API_KEY not set. Please set the LUMAAI_API_KEY environment variable.")
+
+ self.client = LumaAI(auth_token=self.api_key)
+ self.register(self.generate_video)
+ self.register(self.image_to_video)
+
+ def image_to_video(
+ self,
+ agent: Agent,
+ prompt: str,
+ start_image_url: str,
+ end_image_url: Optional[str] = None,
+ loop: bool = False,
+ aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9",
+ ) -> str:
+ """Generate a video from one or two images with a prompt.
+
+ Args:
+ agent: The agent instance
+ prompt: Text description of the desired video
+ start_image_url: URL of the starting image
+ end_image_url: Optional URL of the ending image
+ loop: Whether the video should loop
+ aspect_ratio: Aspect ratio of the output video
+
+ Returns:
+ str: Status message or error
+ """
+
+ try:
+ # Construct keyframes
+ keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": start_image_url}}
+
+ # Add end image if provided
+ if end_image_url:
+ keyframes["frame1"] = {"type": "image", "url": end_image_url}
+
+ # Create generation with keyframes
+ generation = self.client.generations.create(
+ prompt=prompt,
+ loop=loop,
+ aspect_ratio=aspect_ratio,
+ keyframes=keyframes, # type: ignore
+ )
+
+ video_id = str(uuid.uuid4())
+
+ if not self.wait_for_completion:
+ return "Async generation unsupported"
+
+ # Poll for completion
+ seconds_waited = 0
+ while seconds_waited < self.max_wait_time:
+ if not generation or not generation.id:
+ return "Failed to get generation ID"
+
+ generation = self.client.generations.get(generation.id)
+
+ if generation.state == "completed" and generation.assets:
+ video_url = generation.assets.video
+ if video_url:
+ agent.add_video(Video(id=video_id, url=video_url, eta="completed"))
+ return f"Video generated successfully: {video_url}"
+ elif generation.state == "failed":
+ return f"Generation failed: {generation.failure_reason}"
+
+ logger.info(f"Generation in progress... State: {generation.state}")
+ time.sleep(self.poll_interval)
+ seconds_waited += self.poll_interval
+
+ return f"Video generation timed out after {self.max_wait_time} seconds"
+
+ except Exception as e:
+ logger.error(f"Failed to generate video: {e}")
+ return f"Error: {e}"
+
+ def generate_video(
+ self,
+ agent: Agent,
+ prompt: str,
+ loop: bool = False,
+ aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9",
+ keyframes: Optional[Dict[str, Dict[str, str]]] = None,
+ ) -> str:
+ """Use this function to generate a video given a prompt."""
+
+ try:
+ generation_params: Dict[str, Any] = {
+ "prompt": prompt,
+ "loop": loop,
+ "aspect_ratio": aspect_ratio,
+ }
+
+ if keyframes is not None:
+ generation_params["keyframes"] = keyframes
+
+ generation = self.client.generations.create(**generation_params) # type: ignore
+
+ video_id = str(uuid.uuid4())
+ if not self.wait_for_completion:
+ return "Async generation unsupported"
+
+ # Poll for completion
+ seconds_waited = 0
+ while seconds_waited < self.max_wait_time:
+ if not generation or not generation.id:
+ return "Failed to get generation ID"
+
+ generation = self.client.generations.get(generation.id)
+
+ if generation.state == "completed" and generation.assets:
+ video_url = generation.assets.video
+ if video_url:
+ agent.add_video(Video(id=video_id, url=video_url, state="completed"))
+ return f"Video generated successfully: {video_url}"
+ elif generation.state == "failed":
+ return f"Generation failed: {generation.failure_reason}"
+
+ logger.info(f"Generation in progress... State: {generation.state}")
+ time.sleep(self.poll_interval)
+ seconds_waited += self.poll_interval
+
+ return f"Video generation timed out after {self.max_wait_time} seconds"
+
+ except Exception as e:
+ logger.error(f"Failed to generate video: {e}")
+ return f"Error: {e}"