From 21ed44296593f5789966bb668e2668a4302ffd01 Mon Sep 17 00:00:00 2001
From: adogecheems <adogecheems@outlook.com>
Date: Wed, 28 Aug 2024 20:32:11 +0800
Subject: [PATCH] new version

---
 README.md                       | 17 ++++++++++-------
 anisearch/plugins/_webget_cf.py | 34 +++++++++++++++++++++++++++++++++
 anisearch/plugins/acgrip.py     |  3 ---
 anisearch/plugins/comicat.py    | 10 +++-------
 anisearch/plugins/dmhy.py       |  4 ++--
 anisearch/plugins/kisssub.py    | 10 +++-------
 anisearch/plugins/miobt.py      | 10 +++-------
 anisearch/plugins/nyaa.py       |  4 +---
 anisearch/plugins/tokyotosho.py | 13 ++++++++-----
 anisearch/search/AniSearch.py   |  3 +++
 anisearch/search/cli.py         |  4 ++--
 11 files changed, 69 insertions(+), 43 deletions(-)
 create mode 100644 anisearch/plugins/_webget_cf.py

diff --git a/README.md b/README.md
index 2e53093..65e0feb 100644
--- a/README.md
+++ b/README.md
@@ -103,12 +103,15 @@ AniSearch 使用基于元类的插件系统来支持不同的搜索源插件系
 
 ### 已实现的插件
 
-- `dmhy`: 动漫花园搜索源（需要代理，速度较快）
-- `comicat`: 漫猫搜索源（实现很慢，慎用）
-- `kisssub`: 爱恋搜索源（实现很慢，慎用，需要代理）
-- `miobt`：MioBT 搜索源（实现很慢，慎用，需要代理）
-- `nyaa`: nyaa.si 搜索源（需要代理，速度超群，不能使用季度合集搜索）
-- `acgrip`: acg.rip 搜索源（需要代理，速度适中，不能使用季度合集搜索，由于站点的自身原因，获取的magnet是种子的下载链接）
+非常悲哀的是，以下搜索源都需要代理
+
+- `dmhy`: 动漫花园搜索源（速度较快）
+- `comicat`: 漫猫搜索源（实现非常慢，慎用，建议只搜索季度合集）
+- `kisssub`: 爱恋搜索源（同上）
+- `miobt`：MioBT 搜索源（同上）
+- `nyaa`: nyaa.si 搜索源（速度超群，不能使用季度合集搜索）
+- `acgrip`: acg.rip 搜索源（速度适中，不能使用季度合集搜索，由于站点的自身原因，获取的magnet是种子的下载链接）
+- `tokyotosho` : 东京图书馆搜索源（速度适中，不能使用季度合集搜索，绝大部分资源都需要英/日文才能搜到）
 
 ## 创建自定义插件
 要创建自定义插件，您需要继承 BasePlugin 类并实现 search 方法，anisearch 提供了一个实用的http请求函数 `anisearch.plugins._webget.get_html()`，可以直接使用。以下是一个简单的示例：
@@ -159,7 +162,7 @@ anisearch -k <关键词> [选项]
 
 - `-k`, `--keyword`: (必需) 搜索关键词
 - `-p`, `--plugin`: (可选) 搜索插件，默认为 `dmhy`
-- `-n`, `--not-collected`: (可选) 不启用默认的季度全集搜索
+- `-c`, `--collected`: (可选) 是否只搜索季度合集
 
 ### 示例
 
diff --git a/anisearch/plugins/_webget_cf.py b/anisearch/plugins/_webget_cf.py
new file mode 100644
index 0000000..9a50dee
--- /dev/null
+++ b/anisearch/plugins/_webget_cf.py
@@ -0,0 +1,34 @@
+import os
+import requests
+import cloudscraper
+from ..search import log
+
+
+def get_html(url, proxies=None, system_proxy=False, verify=True):
+    if system_proxy:
+        http_proxy = os.environ.get('http_proxy')
+        https_proxy = os.environ.get('https_proxy')
+        if http_proxy or https_proxy:
+            proxies = {'http': http_proxy, 'https': https_proxy}
+        else:
+            log.warning("No system proxy found.")
+            raise requests.exceptions.ProxyError("No system proxy found.")
+
+    try:
+        if not verify:
+            requests.packages.urllib3.disable_warnings()
+
+        scraper = cloudscraper.create_scraper(delay=5, browser={
+            'browser': 'chrome',
+            'platform': 'linux',
+            'mobile': False,
+        })
+
+        response = scraper.get(url, proxies=proxies, verify=verify)
+
+        log.debug(f"A request has been made to url: {url}")
+        return response.content
+
+    except requests.RequestException as e:
+        log.exception(f"The search was aborted due to network reasons: {e}")
+        raise
diff --git a/anisearch/plugins/acgrip.py b/anisearch/plugins/acgrip.py
index bba970e..d812ad2 100644
--- a/anisearch/plugins/acgrip.py
+++ b/anisearch/plugins/acgrip.py
@@ -42,8 +42,6 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
 
                 while tr:
                     tds = tr.find_all("td")
-                    if len(tds) < 4:
-                        break
 
                     release_time = tds[0].find_all("div")[1].time.get("datetime")
                     release_time = time.strftime(self._timefmt, time.localtime(int(release_time)))
@@ -64,5 +62,4 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
                 log.error(f"Error occurred while processing page {page}: {e}")
                 break
 
-        log.info(f"This search is complete: {keyword}")
         return animes
diff --git a/anisearch/plugins/comicat.py b/anisearch/plugins/comicat.py
index d909700..4a5673f 100644
--- a/anisearch/plugins/comicat.py
+++ b/anisearch/plugins/comicat.py
@@ -1,3 +1,4 @@
+# Stable
 import re
 import time
 from typing import Optional, List
@@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
             try:
                 html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
                 bs = BeautifulSoup(html, self._parser)
-                tbody = bs.find("tbody", id="data_list")
+                tbody = bs.find("tbody", class_="tbody", id="data_list")
 
                 if not tbody:
                     break
@@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
                 for tr in tbody.find_all("tr"):
                     tds = tr.find_all("td")
                     release_time = tds[0].get_text(strip=True)
-                    try:
-                        release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
-                    except ValueError:
-                        log.error(f"Invalid time format: {release_time}")
-                        continue
+                    release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
 
                     title = tds[2].a.get_text(strip=True)
                     link = DOMAIN + tds[2].a["href"]
@@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
                 log.error(f"Error occurred while processing page {page}: {e}")
                 break
 
-        log.info(f"This search is complete: {keyword}")
         return animes
diff --git a/anisearch/plugins/dmhy.py b/anisearch/plugins/dmhy.py
index 822d59f..52f899f 100644
--- a/anisearch/plugins/dmhy.py
+++ b/anisearch/plugins/dmhy.py
@@ -1,3 +1,4 @@
+# Stable
 import time
 from typing import Optional, List
 from urllib.parse import urlencode
@@ -18,7 +19,7 @@ class Dmhy(BasePlugin):
     def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None:
         super().__init__(parser, verify, timefmt)
 
-    def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = None,
+    def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] = None,
                system_proxy: bool = False, **extra_options) -> List[Anime]:
         animes: List[Anime] = []
         page = 1
@@ -56,5 +57,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
                 log.error(f"Error occurred while processing page {page}: {e}")
                 break
 
-        log.info(f"This search is complete: {keyword}")
         return animes
diff --git a/anisearch/plugins/kisssub.py b/anisearch/plugins/kisssub.py
index de6bb47..02cc96c 100644
--- a/anisearch/plugins/kisssub.py
+++ b/anisearch/plugins/kisssub.py
@@ -1,3 +1,4 @@
+# Stable
 import re
 import time
 from typing import Optional, List
@@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
             try:
                 html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
                 bs = BeautifulSoup(html, self._parser)
-                tbody = bs.find("tbody", id="data_list")
+                tbody = bs.find("tbody", class_="tbody", id="data_list")
 
                 if not tbody:
                     break
@@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
                 for tr in tbody.find_all("tr"):
                     tds = tr.find_all("td")
                     release_time = tds[0].get_text(strip=True)
-                    try:
-                        release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
-                    except ValueError:
-                        log.error(f"Invalid time format: {release_time}")
-                        continue
+                    release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
 
                     title = tds[2].a.get_text(strip=True)
                     link = DOMAIN + tds[2].a["href"]
@@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
                 log.error(f"Error occurred while processing page {page}: {e}")
                 break
 
-        log.info(f"This search is complete: {keyword}")
         return animes
diff --git a/anisearch/plugins/miobt.py b/anisearch/plugins/miobt.py
index 7280a74..8a752b4 100644
--- a/anisearch/plugins/miobt.py
+++ b/anisearch/plugins/miobt.py
@@ -1,3 +1,4 @@
+# Stable
 import re
 import time
 from typing import Optional, List
@@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
             try:
                 html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
                 bs = BeautifulSoup(html, self._parser)
-                tbody = bs.find("tbody", id="data_list")
+                tbody = bs.find("tbody", class_="tbody", id="data_list")
 
                 if not tbody:
                     break
@@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
                 for tr in tbody.find_all("tr"):
                     tds = tr.find_all("td")
                     release_time = tds[0].get_text(strip=True)
-                    try:
-                        release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
-                    except ValueError:
-                        log.error(f"Invalid time format: {release_time}")
-                        continue
+                    release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
 
                     title = tds[2].a.get_text(strip=True)
                     link = DOMAIN + tds[2].a["href"]
@@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
                 log.error(f"Error occurred while processing page {page}: {e}")
                 break
 
-        log.info(f"This search is complete: {keyword}")
         return animes
diff --git a/anisearch/plugins/nyaa.py b/anisearch/plugins/nyaa.py
index d07d818..3893d53 100644
--- a/anisearch/plugins/nyaa.py
+++ b/anisearch/plugins/nyaa.py
@@ -1,3 +1,4 @@
+# Stable
 import time
 from typing import Optional, List
 from urllib.parse import urlencode
@@ -40,8 +41,6 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
 
                 for tr in tbody.find_all("tr"):
                     tds = tr.find_all("td")
-                    if len(tds) < 5:
-                        continue
 
                     release_time = tds[4].string
                     release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y-%m-%d %H:%M'))
@@ -60,5 +59,4 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
                 log.error(f"Error occurred while processing page {page}: {e}")
                 break
 
-        log.info(f"This search is complete: {keyword}")
         return animes
diff --git a/anisearch/plugins/tokyotosho.py b/anisearch/plugins/tokyotosho.py
index 2d26e52..320fa1d 100644
--- a/anisearch/plugins/tokyotosho.py
+++ b/anisearch/plugins/tokyotosho.py
@@ -12,7 +12,6 @@
 
 BASE_URL = "https://www.tokyotosho.info/search.php?"
 
-
 def extract_info(text):
     size_match = re.search(r"Size:\s([\d.]+(?:MB|GB|KB))", text)
     size = size_match.group(1) if size_match else None
@@ -35,7 +34,7 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
         params = {'terms': keyword, 'type': 1, **extra_options}
 
         if collected:
-            log.warning("Nyaa search does not support collection.")
+            log.warning("Tokyotosho search does not support collection.")
 
         while True:
             params['page'] = page
@@ -45,17 +44,21 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
                 bs = BeautifulSoup(html, self._parser)
                 table = bs.find(class_='listing')
 
-                if table.find(class_='category_0') is None:
+                if not table or not table.find(class_='category_0'):
                     break
 
                 for row in list(zip(*[iter(table.find_all(class_='category_0'))]*2)):
                     top = row[0].find(class_='desc-top')
+                    if not top:
+                        continue
                     title = top.get_text(strip=True)
-                    magnet = top.a['href']
+                    magnet = top.a['href'] if top.a else None
 
                     bottom = row[1].find(class_='desc-bot')
+                    if not bottom:
+                        continue
                     size, release_time = extract_info(bottom.text)
-                    release_time = time.strftime(self._timefmt, release_time)
+                    release_time = time.strftime(self._timefmt, release_time) if release_time else None
 
                     log.debug(f"Successfully got: {title}")
 
diff --git a/anisearch/search/AniSearch.py b/anisearch/search/AniSearch.py
index 8045f25..27900e6 100644
--- a/anisearch/search/AniSearch.py
+++ b/anisearch/search/AniSearch.py
@@ -82,6 +82,9 @@ def search(self, keyword: str, collected: Optional[bool] = None, proxies: Option
 
         try:
             self.animes = self.plugin.search(**kwargs)
+
+            log.info(f"This search is complete: {keyword}")
+
         except Exception as e:
             log.error(f"Search failed: {str(e)}")
             raise
diff --git a/anisearch/search/cli.py b/anisearch/search/cli.py
index 7ecb7f3..94c6652 100644
--- a/anisearch/search/cli.py
+++ b/anisearch/search/cli.py
@@ -41,11 +41,11 @@ def main() -> None:
 
     parser.add_argument('-p', '--plugin', type=str, help='搜索使用的插件', default='dmhy')
     parser.add_argument('-k', '--keyword', type=str, help='搜索关键词', required=True)
-    parser.add_argument('-n', '--not-collected', action='store_true', help='是否不启用默认的季度全集搜索')
+    parser.add_argument('-c', '--collected', action='store_true', help='是否启用季度全集搜索')
 
     args = parser.parse_args()
 
-    search_params: Dict[str, Any] = {'keyword': args.keyword, 'collected': not args.not_collected}
+    search_params: Dict[str, Any] = {'keyword': args.keyword, 'collected': args.collected}
 
     searcher = None
     try: