Merge branch 'release/2.7.0' of https://github.com/phidatahq/phidata …

…into release/2.7.0
agno-agi · Dec 11, 2024 · 629b5de · 629b5de
2 parents 09dd849 + 1c3d341
commit 629b5de
Show file tree

Hide file tree

Showing 10 changed files with 312 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -9,25 +9,29 @@
 </p>
 
 <h3 align="center">
-Build multi-modal Agents with memory, knowledge, tools and reasoning
+Build multi-modal Agents with memory, knowledge, tools and reasoning.
 </h3>
 
 <img
   src="https://github.com/user-attachments/assets/44739d09-2ec4-49b7-bea1-b275afccc592"
   style="border-radius: 8px;"
 />
 
-# What is phidata?
+## What is phidata?
 
-**Phidata is a framework for building multi-modal agents with memory, knowledge, tools and reasoning.**
+**Phidata is a framework for building multi-modal agents**, use phidata to:
+
+- **Build multi-modal agents with memory, knowledge, tools and reasoning.**
+- **Build teams of agents that can work together to solve problems.**
+- **Chat with your agents using a beautiful Agent UI.**
 
 ## Install
 
 ```shell
 pip install -U phidata
 ```
 
-# Key Features
+## Key Features
 
 - [Simple & Elegant](#simple--elegant)
 - [Powerful & Flexible](#powerful--flexible)
@@ -42,7 +46,9 @@ pip install -U phidata
 
 ## Simple & Elegant
 
-Phidata Agents are simple and elegant, resulting in minimal, beautiful code. For example, you can create a web search agent using 10 lines of code, create a file `web_search.py`
+Phidata Agents are simple and elegant, resulting in minimal, beautiful code.
+
+For example, you can create a web search agent in 10 lines of code, create a file `web_search.py`
 
 ```python
 from phi.agent import Agent
@@ -71,7 +77,9 @@ python web_search.py
 
 ## Powerful & Flexible
 
-Phidata agents can use multiple tools and follow instructions to achieve complex tasks. For example, you can create a finance agent that can query financial data, create a file `finance_agent.py`
+Phidata agents can use multiple tools and follow instructions to achieve complex tasks.
+
+For example, you can create a finance agent with tools to query financial data, create a file `finance_agent.py`
 
 ```python
 from phi.agent import Agent
@@ -99,7 +107,9 @@ python finance_agent.py
 
 ## Multi-Modal by default
 
-Phidata agents support text, images, audio and video. For example, you can create an image agent that can understand images and make tool calls as needed, create a file `image_agent.py`
+Phidata agents support text, images, audio and video.
+
+For example, you can create an image agent that can understand images and make tool calls as needed, create a file `image_agent.py`
 
 ```python
 from phi.agent import Agent
@@ -215,14 +225,18 @@ if __name__ == "__main__":
     serve_playground_app("playground:app", reload=True)
 ```
 
-Authenticate with phidata:
 
-```
+Authenticate with phidata by running the following command:
+
+```shell
 phi auth
 ```
 
-> [!NOTE]
-> If `phi auth` fails, you can set the `PHI_API_KEY` environment variable by copying it from [phidata.app](https://www.phidata.app)
+or by exporting the `PHI_API_KEY` for your workspace from [phidata.app](https://www.phidata.app)
+
+```bash
+export PHI_API_KEY=phi-***
+```
 
 Install dependencies and run the Agent Playground:
 
@@ -373,9 +387,6 @@ python reasoning_agent.py
 >
 > It is an experiment fueled by curiosity, combining COT and tool use. Set your expectations very low for this initial release. For example: It will not be able to count ‘r’s in ‘strawberry’.
 
-> [!TIP]
-> If using tools with `reasoning=True`, set `structured_outputs=False` because gpt-4o doesnt support tools with structured outputs.
-
 ## Demo Agents
 
 The Agent Playground includes a few demo agents that you can test with. If you have recommendations for other demo agents, please let us know in our [community forum](https://community.phidata.com/).

diff --git a/cookbook/agents/30_pre_and_post_hooks.py b/cookbook/agents/30_pre_and_post_hooks.py
@@ -6,16 +6,16 @@
 from phi.tools import tool, FunctionCall
 
 
-def pre_hook(function_call: FunctionCall):
-    print(f"Pre-hook: {function_call.function.name}")
-    print(f"Arguments: {function_call.arguments}")
-    print(f"Result: {function_call.result}")
+def pre_hook(fc: FunctionCall):
+    print(f"Pre-hook: {fc.function.name}")
+    print(f"Arguments: {fc.arguments}")
+    print(f"Result: {fc.result}")
 
 
-def post_hook(function_call: FunctionCall):
-    print(f"Post-hook: {function_call.function.name}")
-    print(f"Arguments: {function_call.arguments}")
-    print(f"Result: {function_call.result}")
+def post_hook(fc: FunctionCall):
+    print(f"Post-hook: {fc.function.name}")
+    print(f"Arguments: {fc.arguments}")
+    print(f"Result: {fc.result}")
 
 
 @tool(pre_hook=pre_hook, post_hook=post_hook)

diff --git a/cookbook/agents/43_research_agent_exa.py b/cookbook/agents/43_research_agent_exa.py
@@ -0,0 +1,55 @@
+"""Please install dependencies using:
+pip install openai exa-py phidata
+"""
+
+from textwrap import dedent
+from datetime import datetime
+
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.exa import ExaTools
+
+agent = Agent(
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[ExaTools(start_published_date=datetime.now().strftime("%Y-%m-%d"), type="keyword")],
+    description="You are an advanced AI researcher writing a report on a topic.",
+    instructions=[
+        "For the provided topic, run 3 different searches.",
+        "Read the results carefully and prepare a NYT worthy report.",
+        "Focus on facts and make sure to provide references.",
+    ],
+    expected_output=dedent("""\
+    An engaging, informative, and well-structured report in markdown format:
+
+    ## Engaging Report Title
+
+    ### Overview
+    {give a brief introduction of the report and why the user should read this report}
+    {make this section engaging and create a hook for the reader}
+
+    ### Section 1
+    {break the report into sections}
+    {provide details/facts/processes in this section}
+
+    ... more sections as necessary...
+
+    ### Takeaways
+    {provide key takeaways from the article}
+
+    ### References
+    - [Reference 1](link)
+    - [Reference 2](link)
+    - [Reference 3](link)
+
+    ### About the Author
+    {write a made up for yourself, give yourself a cyberpunk name and a title}
+
+    - published on {date} in dd/mm/yyyy
+    """),
+    markdown=True,
+    show_tool_calls=True,
+    add_datetime_to_instructions=True,
+    save_response_to_file="tmp/{message}.md",
+    # debug_mode=True,
+)
+agent.print_response("Simulation theory", stream=True)
diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py
@@ -14,7 +14,6 @@
 from phi.playground import Playground, serve_playground_app
 from phi.storage.agent.sqlite import SqlAgentStorage
 from phi.tools.fal_tools import FalTools
-from pydantic import BaseModel, Field
 
 image_agent_storage_file: str = "tmp/image_agent.db"
 
@@ -26,7 +25,7 @@
     description="You are an AI agent that can generate images using DALL-E.",
     instructions=[
         "When the user asks you to create an image, use the `create_image` tool to create the image.",
-        "Don't provide the URL of the image in the response. Only describe what image was generated."
+        "Don't provide the URL of the image in the response. Only describe what image was generated.",
     ],
     markdown=True,
     debug_mode=True,
@@ -43,7 +42,7 @@
     description="You are an AI agent that can generate gifs using the ModelsLabs API.",
     instructions=[
         "When the user asks you to create an image, use the `generate_media` tool to create the image.",
-        "Don't provide the URL of the image in the response. Only describe what image was generated."
+        "Don't provide the URL of the image in the response. Only describe what image was generated.",
     ],
     markdown=True,
     debug_mode=True,
@@ -60,7 +59,7 @@
     description="You are an AI agent that can generate videos using the ModelsLabs API.",
     instructions=[
         "When the user asks you to create a video, use the `generate_media` tool to create the video.",
-        "Don't provide the URL of the video in the response. Only describe what video was generated."
+        "Don't provide the URL of the video in the response. Only describe what video was generated.",
     ],
     markdown=True,
     debug_mode=True,
@@ -77,7 +76,7 @@
     description="You are an AI agent that can generate videos using the Fal API.",
     instructions=[
         "When the user asks you to create a video, use the `generate_media` tool to create the video.",
-        "Don't provide the URL of the video in the response. Only describe what video was generated."
+        "Don't provide the URL of the video in the response. Only describe what video was generated.",
     ],
     markdown=True,
     debug_mode=True,

diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py
@@ -0,0 +1,45 @@
+from phi.agent import Agent
+from phi.llm.openai import OpenAIChat
+from phi.tools.lumalab import LumaLabTools
+
+"""Create an agent specialized for Luma AI video generation"""
+
+luma_agent = Agent(
+    name="Luma Video Agent",
+    agent_id="luma-video-agent",
+    llm=OpenAIChat(model="gpt-4o"),
+    tools=[LumaLabTools()],  # Using the LumaLab tool we created
+    markdown=True,
+    debug_mode=True,
+    show_tool_calls=True,
+    instructions=[
+        "You are an agent designed to generate videos using the Luma AI API.",
+        "You can generate videos in two ways:",
+        "1. Text-to-Video Generation:",
+        "   - Use the generate_video function for creating videos from text prompts",
+        "   - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None",
+        "2. Image-to-Video Generation:",
+        "   - Use the image_to_video function when starting from one or two images",
+        "   - Required parameters: prompt, start_image_url",
+        "   - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'",
+        "   - The image URLs must be publicly accessible",
+        "Choose the appropriate function based on whether the user provides image URLs or just a text prompt.",
+        "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.",
+        "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.",
+        "After generating any video, if generation is async (wait_for_completion=False), inform about the generation ID",
+    ],
+    system_message=(
+        "Use generate_video for text-to-video requests and image_to_video for image-based "
+        "generation. Don't modify default parameters unless specifically requested. "
+        "Always provide clear feedback about the video generation status."
+    ),
+)
+
+luma_agent.run("Generate a video of a car in a sky")
+# luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg")
+# luma_agent.run("""
+# Create a transition video between these two images:
+# Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380
+# End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380
+# Make it a smooth, natural movement
+# """)
diff --git a/phi/agent/agent.py b/phi/agent/agent.py
@@ -32,7 +32,7 @@
 from phi.reasoning.step import ReasoningStep, ReasoningSteps, NextAction
 from phi.run.response import RunEvent, RunResponse, RunResponseExtraData
 from phi.knowledge.agent import AgentKnowledge
-from phi.model import Model
+from phi.model.base import Model
 from phi.model.message import Message, MessageReferences
 from phi.model.response import ModelResponse, ModelResponseEvent
 from phi.memory.agent import AgentMemory, MemoryRetrieval, Memory, AgentRun, SessionSummary  # noqa: F401

diff --git a/phi/llm/openai/chat.py b/phi/llm/openai/chat.py
@@ -181,7 +181,9 @@ def to_dict(self) -> Dict[str, Any]:
         if self.presence_penalty:
             _dict["presence_penalty"] = self.presence_penalty
         if self.response_format:
-            _dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+            _dict["response_format"] = (
+                self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+            )
         if self.seed is not None:
             _dict["seed"] = self.seed
         if self.stop:

diff --git a/phi/model/__init__.py b/phi/model/__init__.py
@@ -1 +0,0 @@
-from phi.model.base import Model

diff --git a/phi/model/openai/chat.py b/phi/model/openai/chat.py
@@ -255,7 +255,9 @@ def to_dict(self) -> Dict[str, Any]:
         if self.presence_penalty is not None:
             model_dict["presence_penalty"] = self.presence_penalty
         if self.response_format is not None:
-            model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+            model_dict["response_format"] = (
+                self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
+            )
         if self.seed is not None:
             model_dict["seed"] = self.seed
         if self.stop is not None: