Skip to content

Commit

Permalink
new version
Browse files Browse the repository at this point in the history
  • Loading branch information
adogecheems committed Aug 28, 2024
1 parent c8c54ad commit 21ed442
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 43 deletions.
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,15 @@ AniSearch 使用基于元类的插件系统来支持不同的搜索源插件系

### 已实现的插件

- `dmhy`: 动漫花园搜索源(需要代理,速度较快)
- `comicat`: 漫猫搜索源(实现很慢,慎用)
- `kisssub`: 爱恋搜索源(实现很慢,慎用,需要代理)
- `miobt`:MioBT 搜索源(实现很慢,慎用,需要代理)
- `nyaa`: nyaa.si 搜索源(需要代理,速度超群,不能使用季度合集搜索)
- `acgrip`: acg.rip 搜索源(需要代理,速度适中,不能使用季度合集搜索,由于站点的自身原因,获取的magnet是种子的下载链接)
非常悲哀的是,以下搜索源都需要代理

- `dmhy`: 动漫花园搜索源(速度较快)
- `comicat`: 漫猫搜索源(实现非常慢,慎用,建议只搜索季度合集)
- `kisssub`: 爱恋搜索源(同上)
- `miobt`:MioBT 搜索源(同上)
- `nyaa`: nyaa.si 搜索源(速度超群,不能使用季度合集搜索)
- `acgrip`: acg.rip 搜索源(速度适中,不能使用季度合集搜索,由于站点的自身原因,获取的magnet是种子的下载链接)
- `tokyotosho` : 东京图书馆搜索源(速度适中,不能使用季度合集搜索,绝大部分资源都需要英/日文才能搜到)

## 创建自定义插件
要创建自定义插件,您需要继承 BasePlugin 类并实现 search 方法,anisearch 提供了一个实用的http请求函数 `anisearch.plugins._webget.get_html()`,可以直接使用。以下是一个简单的示例:
Expand Down Expand Up @@ -159,7 +162,7 @@ anisearch -k <关键词> [选项]

- `-k`, `--keyword`: (必需) 搜索关键词
- `-p`, `--plugin`: (可选) 搜索插件,默认为 `dmhy`
- `-n`, `--not-collected`: (可选) 不启用默认的季度全集搜索
- `-c`, `--collected`: (可选) 是否只搜索季度合集

### 示例

Expand Down
34 changes: 34 additions & 0 deletions anisearch/plugins/_webget_cf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
import requests
import cloudscraper
from ..search import log


def get_html(url, proxies=None, system_proxy=False, verify=True):
if system_proxy:
http_proxy = os.environ.get('http_proxy')
https_proxy = os.environ.get('https_proxy')
if http_proxy or https_proxy:
proxies = {'http': http_proxy, 'https': https_proxy}
else:
log.warning("No system proxy found.")
raise requests.exceptions.ProxyError("No system proxy found.")

try:
if not verify:
requests.packages.urllib3.disable_warnings()

scraper = cloudscraper.create_scraper(delay=5, browser={
'browser': 'chrome',
'platform': 'linux',
'mobile': False,
})

response = scraper.get(url, proxies=proxies, verify=verify)

log.debug(f"A request has been made to url: {url}")
return response.content

except requests.RequestException as e:
log.exception(f"The search was aborted due to network reasons: {e}")
raise
3 changes: 0 additions & 3 deletions anisearch/plugins/acgrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]

while tr:
tds = tr.find_all("td")
if len(tds) < 4:
break

release_time = tds[0].find_all("div")[1].time.get("datetime")
release_time = time.strftime(self._timefmt, time.localtime(int(release_time)))
Expand All @@ -64,5 +62,4 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
log.error(f"Error occurred while processing page {page}: {e}")
break

log.info(f"This search is complete: {keyword}")
return animes
10 changes: 3 additions & 7 deletions anisearch/plugins/comicat.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Stable
import re
import time
from typing import Optional, List
Expand Down Expand Up @@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
try:
html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
bs = BeautifulSoup(html, self._parser)
tbody = bs.find("tbody", id="data_list")
tbody = bs.find("tbody", class_="tbody", id="data_list")

if not tbody:
break
Expand All @@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
release_time = tds[0].get_text(strip=True)
try:
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
except ValueError:
log.error(f"Invalid time format: {release_time}")
continue
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))

title = tds[2].a.get_text(strip=True)
link = DOMAIN + tds[2].a["href"]
Expand All @@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
log.error(f"Error occurred while processing page {page}: {e}")
break

log.info(f"This search is complete: {keyword}")
return animes
4 changes: 2 additions & 2 deletions anisearch/plugins/dmhy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Stable
import time
from typing import Optional, List
from urllib.parse import urlencode
Expand All @@ -18,7 +19,7 @@ class Dmhy(BasePlugin):
def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None:
super().__init__(parser, verify, timefmt)

def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = None,
def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] = None,
system_proxy: bool = False, **extra_options) -> List[Anime]:
animes: List[Anime] = []
page = 1
Expand Down Expand Up @@ -56,5 +57,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
log.error(f"Error occurred while processing page {page}: {e}")
break

log.info(f"This search is complete: {keyword}")
return animes
10 changes: 3 additions & 7 deletions anisearch/plugins/kisssub.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Stable
import re
import time
from typing import Optional, List
Expand Down Expand Up @@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
try:
html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
bs = BeautifulSoup(html, self._parser)
tbody = bs.find("tbody", id="data_list")
tbody = bs.find("tbody", class_="tbody", id="data_list")

if not tbody:
break
Expand All @@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
release_time = tds[0].get_text(strip=True)
try:
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
except ValueError:
log.error(f"Invalid time format: {release_time}")
continue
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))

title = tds[2].a.get_text(strip=True)
link = DOMAIN + tds[2].a["href"]
Expand All @@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
log.error(f"Error occurred while processing page {page}: {e}")
break

log.info(f"This search is complete: {keyword}")
return animes
10 changes: 3 additions & 7 deletions anisearch/plugins/miobt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Stable
import re
import time
from typing import Optional, List
Expand Down Expand Up @@ -47,7 +48,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
try:
html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
bs = BeautifulSoup(html, self._parser)
tbody = bs.find("tbody", id="data_list")
tbody = bs.find("tbody", class_="tbody", id="data_list")

if not tbody:
break
Expand All @@ -61,11 +62,7 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
release_time = tds[0].get_text(strip=True)
try:
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
except ValueError:
log.error(f"Invalid time format: {release_time}")
continue
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))

title = tds[2].a.get_text(strip=True)
link = DOMAIN + tds[2].a["href"]
Expand All @@ -89,5 +86,4 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
log.error(f"Error occurred while processing page {page}: {e}")
break

log.info(f"This search is complete: {keyword}")
return animes
4 changes: 1 addition & 3 deletions anisearch/plugins/nyaa.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Stable
import time
from typing import Optional, List
from urllib.parse import urlencode
Expand Down Expand Up @@ -40,8 +41,6 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]

for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
if len(tds) < 5:
continue

release_time = tds[4].string
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y-%m-%d %H:%M'))
Expand All @@ -60,5 +59,4 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
log.error(f"Error occurred while processing page {page}: {e}")
break

log.info(f"This search is complete: {keyword}")
return animes
13 changes: 8 additions & 5 deletions anisearch/plugins/tokyotosho.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

BASE_URL = "https://www.tokyotosho.info/search.php?"


def extract_info(text):
size_match = re.search(r"Size:\s([\d.]+(?:MB|GB|KB))", text)
size = size_match.group(1) if size_match else None
Expand All @@ -35,7 +34,7 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
params = {'terms': keyword, 'type': 1, **extra_options}

if collected:
log.warning("Nyaa search does not support collection.")
log.warning("Tokyotosho search does not support collection.")

while True:
params['page'] = page
Expand All @@ -45,17 +44,21 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
bs = BeautifulSoup(html, self._parser)
table = bs.find(class_='listing')

if table.find(class_='category_0') is None:
if not table or not table.find(class_='category_0'):
break

for row in list(zip(*[iter(table.find_all(class_='category_0'))]*2)):
top = row[0].find(class_='desc-top')
if not top:
continue
title = top.get_text(strip=True)
magnet = top.a['href']
magnet = top.a['href'] if top.a else None

bottom = row[1].find(class_='desc-bot')
if not bottom:
continue
size, release_time = extract_info(bottom.text)
release_time = time.strftime(self._timefmt, release_time)
release_time = time.strftime(self._timefmt, release_time) if release_time else None

log.debug(f"Successfully got: {title}")

Expand Down
3 changes: 3 additions & 0 deletions anisearch/search/AniSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def search(self, keyword: str, collected: Optional[bool] = None, proxies: Option

try:
self.animes = self.plugin.search(**kwargs)

log.info(f"This search is complete: {keyword}")

except Exception as e:
log.error(f"Search failed: {str(e)}")
raise
Expand Down
4 changes: 2 additions & 2 deletions anisearch/search/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ def main() -> None:

parser.add_argument('-p', '--plugin', type=str, help='搜索使用的插件', default='dmhy')
parser.add_argument('-k', '--keyword', type=str, help='搜索关键词', required=True)
parser.add_argument('-n', '--not-collected', action='store_true', help='是否不启用默认的季度全集搜索')
parser.add_argument('-c', '--collected', action='store_true', help='是否启用季度全集搜索')

args = parser.parse_args()

search_params: Dict[str, Any] = {'keyword': args.keyword, 'collected': not args.not_collected}
search_params: Dict[str, Any] = {'keyword': args.keyword, 'collected': args.collected}

searcher = None
try:
Expand Down

0 comments on commit 21ed442

Please sign in to comment.