From ea1b7bf72cdb2b72fad4c728939f8bc1ff777cb7 Mon Sep 17 00:00:00 2001 From: Onur ULUSOY Date: Thu, 6 Jun 2024 05:29:21 +0300 Subject: [PATCH] Added try excepts --- tiger/tools/search/duckduckgo.py | 9 ++-- tiger/tools/search/google.py | 8 +++- tiger/tools/search/read_website.py | 72 ++++++++++++++++-------------- 3 files changed, 49 insertions(+), 40 deletions(-) diff --git a/tiger/tools/search/duckduckgo.py b/tiger/tools/search/duckduckgo.py index 47cbc50..66a417b 100644 --- a/tiger/tools/search/duckduckgo.py +++ b/tiger/tools/search/duckduckgo.py @@ -1,9 +1,10 @@ def search(query:str, max_number:int=20) -> list: - from duckduckgo_search import DDGS - - - return [result["href"] for result in DDGS().text(query, max_results=max_number)] + try: + from duckduckgo_search import DDGS + return [result["href"] for result in DDGS().text(query, max_results=max_number)] + except: + return "An exception occurred" tool_name = "search.duckduckgo" diff --git a/tiger/tools/search/google.py b/tiger/tools/search/google.py index dfc6c28..c25e857 100644 --- a/tiger/tools/search/google.py +++ b/tiger/tools/search/google.py @@ -1,8 +1,12 @@ def google(query:str, max_number:int=20) -> list: - from googlesearch import search as gsearch - return list(gsearch(query, stop=max_number)) + try: + from googlesearch import search as gsearch + return list(gsearch(query, stop=max_number)) + except: + return "An exception occurred" + tool_name = "search.google" diff --git a/tiger/tools/search/read_website.py b/tiger/tools/search/read_website.py index 39e9b6f..111e617 100644 --- a/tiger/tools/search/read_website.py +++ b/tiger/tools/search/read_website.py @@ -1,40 +1,44 @@ def read_website(url: str, max_content_lenght: int = 5000) -> dict: - import requests - - from bs4 import BeautifulSoup - import re - - html = requests.get(url).text - soup = BeautifulSoup(html) - meta_properties = [ - "og:description", - "og:site_name", - "og:title", - "og:type", - "og:url", - ] - meta = {} - for property_name in meta_properties: - try: - tag = soup.find("meta", property=property_name) - if tag: - meta[property_name] = str(tag.get("content", None)) - except AttributeError: - meta[property_name] = None - for ignore_tag in soup(["script", "style"]): - ignore_tag.decompose() - title = soup.title.string if soup.title else "" - content = soup.body.get_text() if soup.body else "" - links = [] - for a in soup.find_all("a", href=True): - links.append({"title": a.text.strip(), "link": a["href"]}) - content = re.sub(r"[\n\r\t]+", "\n", content) - content = re.sub(r" +", " ", content) - content = re.sub(r"[\n ]{3,}", "\n\n", content) - content = content.strip() - return {"meta": meta, "title": title, "content": content[:max_content_lenght], "sub_links": links} + try: + import requests + + from bs4 import BeautifulSoup + import re + + html = requests.get(url).text + soup = BeautifulSoup(html) + meta_properties = [ + "og:description", + "og:site_name", + "og:title", + "og:type", + "og:url", + ] + meta = {} + for property_name in meta_properties: + try: + tag = soup.find("meta", property=property_name) + if tag: + meta[property_name] = str(tag.get("content", None)) + except AttributeError: + meta[property_name] = None + for ignore_tag in soup(["script", "style"]): + ignore_tag.decompose() + title = soup.title.string if soup.title else "" + content = soup.body.get_text() if soup.body else "" + links = [] + for a in soup.find_all("a", href=True): + links.append({"title": a.text.strip(), "link": a["href"]}) + content = re.sub(r"[\n\r\t]+", "\n", content) + content = re.sub(r" +", " ", content) + content = re.sub(r"[\n ]{3,}", "\n\n", content) + content = content.strip() + return {"meta": meta, "title": title, "content": content[:max_content_lenght], "sub_links": links} + + except: + return "An exception occurred"