Skip to content

Commit

Permalink
Improve performance and precision on movies AI
Browse files Browse the repository at this point in the history
  • Loading branch information
fjsj committed Oct 2, 2024
1 parent c98970d commit 854c952
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 25 deletions.
1 change: 1 addition & 0 deletions example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Fill the `.env` file with the necessary API keys. You'll need accounts on:
- [OpenAI](https://platform.openai.com/)
- [Weather API](https://www.weatherapi.com/)
- [Brave Search API](https://app.tavily.com/)
- [Jina Reader API](https://jina.ai/)

Activate the poetry shell:

Expand Down
1 change: 1 addition & 0 deletions example/example/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@
# Example specific settings:

WEATHER_API_KEY = os.getenv("WEATHER_API_KEY") # get for free at https://www.weatherapi.com/
JINA_API_KEY = os.getenv("JINA_API_KEY") # get for free at https://jina.ai/
BRAVE_SEARCH_API_KEY = os.getenv(
"BRAVE_SEARCH_API_KEY"
) # get for free at https://brave.com/search/api/
Expand Down
85 changes: 60 additions & 25 deletions example/movies/ai_assistants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import threading
import time
from typing import Sequence

from django.conf import settings
Expand All @@ -8,16 +10,30 @@
import requests
from langchain_community.tools import BraveSearch
from langchain_core.tools import BaseTool
from pydantic import BaseModel

from django_ai_assistant import AIAssistant, method_tool
from movies.models import MovieBacklogItem


class IMDbMovie(BaseModel):
imdb_url: str
imdb_rating: float
scrapped_imdb_page_markdown: str
brave_search_lock = threading.Lock()


class RateLimitedBraveSearch(BraveSearch):
def _run(self, query: str, **kwargs) -> str:
"""Use the tool."""

# brave_search_lock is necessary to ensure 1 request/second,
# due to free plan limitations of Brave Search API:
try:
brave_search_lock.acquire(timeout=10)
start_time = time.time()
result = self.search_wrapper.run(query)
elapsed_time = time.time() - start_time
if 1 - elapsed_time > 0:
time.sleep(1 - elapsed_time + 0.2) # sleep plus some jitter
return result
finally:
brave_search_lock.release()


# Note this assistant is not registered, but we'll use it as a tool on the other.
Expand All @@ -28,31 +44,38 @@ class IMDbScraper(AIAssistant):
"You're a function to find the IMDb URL of a given movie, "
"and scrape this URL to get the movie rating and other information.\n"
"Use the search function to find the IMDb URL. "
"Make search queries like: \n"
"- IMDb page of The Matrix\n"
"- IMDb page of The Godfather\n"
"- IMDb page of The Shawshank Redemption\n"
"Then check results, scape the IMDb URL, process the page, and produce a JSON output."
"Make search queries like:\n"
"- IMDb page of <queried movie here>\n"
"Then check results, scrape the IMDb URL, process the page, and produce an output like this: \n"
"- IMDb URL: ...\n"
"- IMDb Rating: ...\n"
"- IMDb Page: <Markdown content of the IMDb page>"
)
name = "IMDb Scraper"
model = "gpt-4o-mini"
structured_output = IMDbMovie
tool_max_concurrency = 4

def get_instructions(self):
# Warning: this will use the server's timezone
# See: https://docs.djangoproject.com/en/5.0/topics/i18n/timezones/#default-time-zone-and-current-time-zone
# In a real application, you should use the user's timezone
current_date_str = timezone.now().date().isoformat()
return f"{self.instructions} Today is: {current_date_str}."
return f"{self.instructions}.\n Today is: {current_date_str}."

@method_tool
def scrape_imdb_url(self, url: str) -> str:
"""Scrape the IMDb URL and return the content as markdown."""
return requests.get("https://r.jina.ai/" + url, timeout=20).text[:10000]
"""Scrape the IMDb URL and return the content as Markdown."""
return requests.get(
"https://r.jina.ai/" + url,
headers={
"Authorization": "Bearer " + settings.JINA_API_KEY,
},
timeout=20,
).text[:30000]

def get_tools(self) -> Sequence[BaseTool]:
return [
BraveSearch.from_api_key(
RateLimitedBraveSearch.from_api_key(
api_key=settings.BRAVE_SEARCH_API_KEY, search_kwargs={"count": 5}
),
*super().get_tools(),
Expand All @@ -63,18 +86,23 @@ class MovieRecommendationAIAssistant(AIAssistant):
id = "movie_recommendation_assistant" # noqa: A003
instructions = (
"You're a helpful movie recommendation assistant. "
"Help the user find movies to watch and manage their movie backlogs. "
"Use the provided functions to answer questions and run operations.\n"
"Use the provided functions to answer queries and run operations.\n"
"Use the search function to find movie recommendations based on user's query.\n"
"Then, use the IMDb Scraper to get the IMDb URL and rating of the movies you're recommending. "
"Both the IMDb URL and rating are necessary to add a movie to the user's backlog. "
"Note the backlog is stored in a DB. "
"When managing the backlog, you must call the functions, to keep the sync with the DB. "
"When managing the backlog, you must call the functions, to keep your answers in sync with the DB. "
"The backlog has an order, and you should respect it. Call `reorder_backlog` when necessary.\n"
"Include the IMDb URL and rating of the movies when displaying the backlog. "
"You must use the IMDb Scraper to get the IMDb URL and rating of the movies. \n"
"Ask the user if they want to add your recommended movies to their backlog, "
"but only if the movie is not on the user's backlog yet."
"When showing the backlog, show the movies in the order they are stored in the DB, "
"and include the IMDb URL and rating.\n"
"Ask the user if they want to add your recommended movies to their backlog.\n"
"User may talk to you in any language. Respond with the same language, "
"but refer to movies and call functions with their English name.\n"
"Do not include images in your response."
)
name = "Movie Recommendation Assistant"
model = "gpt-4o-mini"
tool_max_concurrency = 4

def get_instructions(self):
# Warning: this will use the server's timezone
Expand All @@ -93,10 +121,14 @@ def get_instructions(self):

def get_tools(self) -> Sequence[BaseTool]:
return [
BraveSearch.from_api_key(
RateLimitedBraveSearch.from_api_key(
api_key=settings.BRAVE_SEARCH_API_KEY, search_kwargs={"count": 5}
),
IMDbScraper().as_tool(description="IMDb Scraper to get the IMDb data a given movie."),
IMDbScraper().as_tool(
description="IMDb Scraper to get the IMDb data a given movie. "
"Given a movie name (in English), "
"finds the movie URL, rating, and scrapes the IMDb page (as Markdown)."
),
*super().get_tools(),
]

Expand All @@ -116,7 +148,10 @@ def get_movies_backlog(self) -> str:

@method_tool
def add_movie_to_backlog(self, movie_name: str, imdb_url: str, imdb_rating: float) -> str:
"""Add a movie to user's backlog. Must pass the movie_name, imdb_url, and imdb_rating."""
"""
Add a movie to user's backlog. Must pass the movie_name, imdb_url, and imdb_rating.
Set imdb_rating to 0.0 if not available.
"""

with transaction.atomic():
MovieBacklogItem.objects.update_or_create(
Expand Down

0 comments on commit 854c952

Please sign in to comment.