From 21ed44296593f5789966bb668e2668a4302ffd01 Mon Sep 17 00:00:00 2001 From: adogecheems Date: Wed, 28 Aug 2024 20:32:11 +0800 Subject: [PATCH] new version --- README.md | 17 ++++++++++------- anisearch/plugins/_webget_cf.py | 34 +++++++++++++++++++++++++++++++++ anisearch/plugins/acgrip.py | 3 --- anisearch/plugins/comicat.py | 10 +++------- anisearch/plugins/dmhy.py | 4 ++-- anisearch/plugins/kisssub.py | 10 +++------- anisearch/plugins/miobt.py | 10 +++------- anisearch/plugins/nyaa.py | 4 +--- anisearch/plugins/tokyotosho.py | 13 ++++++++----- anisearch/search/AniSearch.py | 3 +++ anisearch/search/cli.py | 4 ++-- 11 files changed, 69 insertions(+), 43 deletions(-) create mode 100644 anisearch/plugins/_webget_cf.py diff --git a/README.md b/README.md index 2e53093..65e0feb 100644 --- a/README.md +++ b/README.md @@ -103,12 +103,15 @@ AniSearch 使用基于元类的插件系统来支持不同的搜索源插件系 ### 已实现的插件 -- `dmhy`: 动漫花园搜索源(需要代理,速度较快) -- `comicat`: 漫猫搜索源(实现很慢,慎用) -- `kisssub`: 爱恋搜索源(实现很慢,慎用,需要代理) -- `miobt`:MioBT 搜索源(实现很慢,慎用,需要代理) -- `nyaa`: nyaa.si 搜索源(需要代理,速度超群,不能使用季度合集搜索) -- `acgrip`: acg.rip 搜索源(需要代理,速度适中,不能使用季度合集搜索,由于站点的自身原因,获取的magnet是种子的下载链接) +非常悲哀的是,以下搜索源都需要代理 + +- `dmhy`: 动漫花园搜索源(速度较快) +- `comicat`: 漫猫搜索源(实现非常慢,慎用,建议只搜索季度合集) +- `kisssub`: 爱恋搜索源(同上) +- `miobt`:MioBT 搜索源(同上) +- `nyaa`: nyaa.si 搜索源(速度超群,不能使用季度合集搜索) +- `acgrip`: acg.rip 搜索源(速度适中,不能使用季度合集搜索,由于站点的自身原因,获取的magnet是种子的下载链接) +- `tokyotosho` : 东京图书馆搜索源(速度适中,不能使用季度合集搜索,绝大部分资源都需要英/日文才能搜到) ## 创建自定义插件 要创建自定义插件,您需要继承 BasePlugin 类并实现 search 方法,anisearch 提供了一个实用的http请求函数 `anisearch.plugins._webget.get_html()`,可以直接使用。以下是一个简单的示例: @@ -159,7 +162,7 @@ anisearch -k <关键词> [选项] - `-k`, `--keyword`: (必需) 搜索关键词 - `-p`, `--plugin`: (可选) 搜索插件,默认为 `dmhy` -- `-n`, `--not-collected`: (可选) 不启用默认的季度全集搜索 +- `-c`, `--collected`: (可选) 是否只搜索季度合集 ### 示例 diff --git a/anisearch/plugins/_webget_cf.py b/anisearch/plugins/_webget_cf.py new file mode 100644 index 0000000..9a50dee --- /dev/null +++ b/anisearch/plugins/_webget_cf.py @@ -0,0 +1,34 @@ +import os +import requests +import cloudscraper +from ..search import log + + +def get_html(url, proxies=None, system_proxy=False, verify=True): + if system_proxy: + http_proxy = os.environ.get('http_proxy') + https_proxy = os.environ.get('https_proxy') + if http_proxy or https_proxy: + proxies = {'http': http_proxy, 'https': https_proxy} + else: + log.warning("No system proxy found.") + raise requests.exceptions.ProxyError("No system proxy found.") + + try: + if not verify: + requests.packages.urllib3.disable_warnings() + + scraper = cloudscraper.create_scraper(delay=5, browser={ + 'browser': 'chrome', + 'platform': 'linux', + 'mobile': False, + }) + + response = scraper.get(url, proxies=proxies, verify=verify) + + log.debug(f"A request has been made to url: {url}") + return response.content + + except requests.RequestException as e: + log.exception(f"The search was aborted due to network reasons: {e}") + raise diff --git a/anisearch/plugins/acgrip.py b/anisearch/plugins/acgrip.py index bba970e..d812ad2 100644 --- a/anisearch/plugins/acgrip.py +++ b/anisearch/plugins/acgrip.py @@ -42,8 +42,6 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] while tr: tds = tr.find_all("td") - if len(tds) < 4: - break release_time = tds[0].find_all("div")[1].time.get("datetime") release_time = time.strftime(self._timefmt, time.localtime(int(release_time))) @@ -64,5 +62,4 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] log.error(f"Error occurred while processing page {page}: {e}") break - log.info(f"This search is complete: {keyword}") return animes diff --git a/anisearch/plugins/comicat.py b/anisearch/plugins/comicat.py index d909700..4a5673f 100644 --- a/anisearch/plugins/comicat.py +++ b/anisearch/plugins/comicat.py @@ -1,3 +1,4 @@ +# Stable import re import time from typing import Optional, List @@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = try: html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy) bs = BeautifulSoup(html, self._parser) - tbody = bs.find("tbody", id="data_list") + tbody = bs.find("tbody", class_="tbody", id="data_list") if not tbody: break @@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = for tr in tbody.find_all("tr"): tds = tr.find_all("td") release_time = tds[0].get_text(strip=True) - try: - release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d')) - except ValueError: - log.error(f"Invalid time format: {release_time}") - continue + release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d')) title = tds[2].a.get_text(strip=True) link = DOMAIN + tds[2].a["href"] @@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = log.error(f"Error occurred while processing page {page}: {e}") break - log.info(f"This search is complete: {keyword}") return animes diff --git a/anisearch/plugins/dmhy.py b/anisearch/plugins/dmhy.py index 822d59f..52f899f 100644 --- a/anisearch/plugins/dmhy.py +++ b/anisearch/plugins/dmhy.py @@ -1,3 +1,4 @@ +# Stable import time from typing import Optional, List from urllib.parse import urlencode @@ -18,7 +19,7 @@ class Dmhy(BasePlugin): def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None: super().__init__(parser, verify, timefmt) - def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = None, + def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] = None, system_proxy: bool = False, **extra_options) -> List[Anime]: animes: List[Anime] = [] page = 1 @@ -56,5 +57,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = log.error(f"Error occurred while processing page {page}: {e}") break - log.info(f"This search is complete: {keyword}") return animes diff --git a/anisearch/plugins/kisssub.py b/anisearch/plugins/kisssub.py index de6bb47..02cc96c 100644 --- a/anisearch/plugins/kisssub.py +++ b/anisearch/plugins/kisssub.py @@ -1,3 +1,4 @@ +# Stable import re import time from typing import Optional, List @@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = try: html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy) bs = BeautifulSoup(html, self._parser) - tbody = bs.find("tbody", id="data_list") + tbody = bs.find("tbody", class_="tbody", id="data_list") if not tbody: break @@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = for tr in tbody.find_all("tr"): tds = tr.find_all("td") release_time = tds[0].get_text(strip=True) - try: - release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d')) - except ValueError: - log.error(f"Invalid time format: {release_time}") - continue + release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d')) title = tds[2].a.get_text(strip=True) link = DOMAIN + tds[2].a["href"] @@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = log.error(f"Error occurred while processing page {page}: {e}") break - log.info(f"This search is complete: {keyword}") return animes diff --git a/anisearch/plugins/miobt.py b/anisearch/plugins/miobt.py index 7280a74..8a752b4 100644 --- a/anisearch/plugins/miobt.py +++ b/anisearch/plugins/miobt.py @@ -1,3 +1,4 @@ +# Stable import re import time from typing import Optional, List @@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = try: html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy) bs = BeautifulSoup(html, self._parser) - tbody = bs.find("tbody", id="data_list") + tbody = bs.find("tbody", class_="tbody", id="data_list") if not tbody: break @@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = for tr in tbody.find_all("tr"): tds = tr.find_all("td") release_time = tds[0].get_text(strip=True) - try: - release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d')) - except ValueError: - log.error(f"Invalid time format: {release_time}") - continue + release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d')) title = tds[2].a.get_text(strip=True) link = DOMAIN + tds[2].a["href"] @@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = log.error(f"Error occurred while processing page {page}: {e}") break - log.info(f"This search is complete: {keyword}") return animes diff --git a/anisearch/plugins/nyaa.py b/anisearch/plugins/nyaa.py index d07d818..3893d53 100644 --- a/anisearch/plugins/nyaa.py +++ b/anisearch/plugins/nyaa.py @@ -1,3 +1,4 @@ +# Stable import time from typing import Optional, List from urllib.parse import urlencode @@ -40,8 +41,6 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] for tr in tbody.find_all("tr"): tds = tr.find_all("td") - if len(tds) < 5: - continue release_time = tds[4].string release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y-%m-%d %H:%M')) @@ -60,5 +59,4 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] log.error(f"Error occurred while processing page {page}: {e}") break - log.info(f"This search is complete: {keyword}") return animes diff --git a/anisearch/plugins/tokyotosho.py b/anisearch/plugins/tokyotosho.py index 2d26e52..320fa1d 100644 --- a/anisearch/plugins/tokyotosho.py +++ b/anisearch/plugins/tokyotosho.py @@ -12,7 +12,6 @@ BASE_URL = "https://www.tokyotosho.info/search.php?" - def extract_info(text): size_match = re.search(r"Size:\s([\d.]+(?:MB|GB|KB))", text) size = size_match.group(1) if size_match else None @@ -35,7 +34,7 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] params = {'terms': keyword, 'type': 1, **extra_options} if collected: - log.warning("Nyaa search does not support collection.") + log.warning("Tokyotosho search does not support collection.") while True: params['page'] = page @@ -45,17 +44,21 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] bs = BeautifulSoup(html, self._parser) table = bs.find(class_='listing') - if table.find(class_='category_0') is None: + if not table or not table.find(class_='category_0'): break for row in list(zip(*[iter(table.find_all(class_='category_0'))]*2)): top = row[0].find(class_='desc-top') + if not top: + continue title = top.get_text(strip=True) - magnet = top.a['href'] + magnet = top.a['href'] if top.a else None bottom = row[1].find(class_='desc-bot') + if not bottom: + continue size, release_time = extract_info(bottom.text) - release_time = time.strftime(self._timefmt, release_time) + release_time = time.strftime(self._timefmt, release_time) if release_time else None log.debug(f"Successfully got: {title}") diff --git a/anisearch/search/AniSearch.py b/anisearch/search/AniSearch.py index 8045f25..27900e6 100644 --- a/anisearch/search/AniSearch.py +++ b/anisearch/search/AniSearch.py @@ -82,6 +82,9 @@ def search(self, keyword: str, collected: Optional[bool] = None, proxies: Option try: self.animes = self.plugin.search(**kwargs) + + log.info(f"This search is complete: {keyword}") + except Exception as e: log.error(f"Search failed: {str(e)}") raise diff --git a/anisearch/search/cli.py b/anisearch/search/cli.py index 7ecb7f3..94c6652 100644 --- a/anisearch/search/cli.py +++ b/anisearch/search/cli.py @@ -41,11 +41,11 @@ def main() -> None: parser.add_argument('-p', '--plugin', type=str, help='搜索使用的插件', default='dmhy') parser.add_argument('-k', '--keyword', type=str, help='搜索关键词', required=True) - parser.add_argument('-n', '--not-collected', action='store_true', help='是否不启用默认的季度全集搜索') + parser.add_argument('-c', '--collected', action='store_true', help='是否启用季度全集搜索') args = parser.parse_args() - search_params: Dict[str, Any] = {'keyword': args.keyword, 'collected': not args.not_collected} + search_params: Dict[str, Any] = {'keyword': args.keyword, 'collected': args.collected} searcher = None try: