Skip to content

Commit

Permalink
...
Browse files Browse the repository at this point in the history
  • Loading branch information
adogecheems committed Aug 28, 2024
1 parent 653d219 commit c8c54ad
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 62 deletions.
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,15 @@ print(searcher.anime.size)

`AniSearch` 是主要的搜索类,提供以下方法:

- `search(keyword, collected=None, proxies=None, system_proxy=None)`: 搜索动画
- `search(keyword, collected=None, proxies=None, system_proxy=None, **extra_options)`: 搜索动画
- `select(index)`: 从搜索结果中选择一个动画
- `size_format(unit='MB')`: 转换选定动画的文件大小
- `save_csv(filename)`: 将搜索结果保存到 CSV 文件(所有结果)

** extra_options 参数会被并入爬取时的查询字符串中,可以用于指定额外的分类或选项,具体的查询字符串请自行查看搜索源搜索时的 url

![查询字符串](https://cdn.mmoe.work/img/url.png)

### Anime 类

`Anime` 类代表一个动画条目,包含以下属性:
Expand Down Expand Up @@ -118,11 +122,10 @@ from anisearch.plugins._webget import get_html
class Custom(BasePlugin):
abstract = False

def __init__(self, *args, **kwargs):
# 可以有必要的初始化代码
pass
def __init__(self, parser, verify, timefmt) -> None:
super().__init__(parser, verify, timefmt)

def search(self, keyword, if_collected=True, proxies=None, system_proxy=False):
def search(self, keyword, if_collected=True, proxies=None, system_proxy=False, **extra_options):
html = get_html("<url>", proxies=None, system_proxy=False, verify=True)

# 这里实现您的搜索逻辑
Expand Down
8 changes: 7 additions & 1 deletion anisearch/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@ def __init__(cls, name, bases, attrs):
class BasePlugin(metaclass=PluginMeta):
abstract = True

def __init__(self, parser, verify, timefmt):
self._parser = parser
self._verify = verify
self._timefmt = timefmt

@abstractmethod
def search(self, keyword, proxies, system_proxy):
def search(self, keyword, proxies, system_proxy, extra_options):
"""
Abstract method to search for a keyword.
Expand All @@ -27,6 +32,7 @@ def search(self, keyword, proxies, system_proxy):
"""
pass


def get_plugin(name: str):
"""
Get a plugin by its name.
Expand Down
12 changes: 6 additions & 6 deletions anisearch/plugins/acgrip.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Stable
import time
from typing import Optional, List
from urllib.parse import urlencode
Expand All @@ -18,15 +19,14 @@ class Acgrip(BasePlugin):

def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None:
log.warning("Using acg.rip search can only return torrent download addresses, not magnet links")
self._parser = parser
self._verify = verify
self._timefmt = timefmt
super().__init__(parser, verify, timefmt)

def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] = None,
system_proxy: bool = False) -> List[Anime]:
system_proxy: bool = False, **extra_options) -> List[Anime]:
animes: List[Anime] = []
page = 1
params = {'term': keyword}

params = {'term': keyword, **extra_options}
if collected:
log.warning("Acg.rip search does not support collection.")

Expand All @@ -49,7 +49,7 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
release_time = time.strftime(self._timefmt, time.localtime(int(release_time)))

title = tds[1].find_all("a")[-1].get_text(strip=True)
magnet = DOMAIN + tds[2].a.get("href")
magnet = DOMAIN + tds[2].a["href"]
size = tds[3].string

log.debug(f"Successfully got: {title}")
Expand Down
18 changes: 8 additions & 10 deletions anisearch/plugins/comicat.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,15 @@ class Comicat(BasePlugin):
abstract = False

def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None:
self._parser = parser
self._verify = verify
self._timefmt = timefmt
super().__init__(parser, verify, timefmt)

def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = None,
system_proxy: bool = False) -> List[Anime]:
system_proxy: bool = False, **extra_options) -> List[Anime]:
animes: List[Anime] = []
prev_anime_title = ""
page = 1

params = {'keyword': keyword}
params = {'keyword': keyword, **extra_options}
if collected:
params['complete'] = 1

Expand All @@ -54,14 +52,14 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
if not tbody:
break

current_titles = [tr.find_all('td')[2].a.get_text(strip=True) for tr in tbody.find_all('tr')]
current_titles = [tr.find_all("td")[2].a.get_text(strip=True) for tr in tbody.find_all("tr")]
if not current_titles or current_titles[0] == prev_anime_title:
break

prev_anime_title = current_titles[0]

for tr in tbody.find_all('tr'):
tds = tr.find_all('td')
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
release_time = tds[0].get_text(strip=True)
try:
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
Expand All @@ -70,13 +68,13 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
continue

title = tds[2].a.get_text(strip=True)
link = DOMAIN + tds[2].a['href']
link = DOMAIN + tds[2].a["href"]
size = tds[3].string

try:
link_html = get_html(link, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
link_bs = BeautifulSoup(link_html, self._parser)
script = link_bs.find(id='btm').find(class_='main').find('script').find_next_siblings('script')[-1].string
script = link_bs.find(id="btm").find(class_="main").find("script").find_next_siblings("script")[-1].string
magnet = get_magnet(script)
except (ValueError, AttributeError, Exception) as e:
log.error(f"Failed to get magnet link for {title}: {e}")
Expand Down
17 changes: 8 additions & 9 deletions anisearch/plugins/dmhy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@ class Dmhy(BasePlugin):
abstract = False

def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None:
self._parser = parser
self._verify = verify
self._timefmt = timefmt
super().__init__(parser, verify, timefmt)

def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = None,
system_proxy: bool = False) -> List[Anime]:
system_proxy: bool = False, **extra_options) -> List[Anime]:
animes: List[Anime] = []
page = 1
params = {'keyword': keyword}

params = {'keyword': keyword, **extra_options}
if collected:
params['sort_id'] = "31"

Expand All @@ -33,18 +32,18 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
try:
html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
bs = BeautifulSoup(html, self._parser)
topic = bs.find(id="topic_list")
tbody = bs.find("tbody")

if not topic:
if not tbody:
break

for tr in topic.tbody.find_all("tr"):
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
release_time = tds[0].span.string
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d %H:%M'))

title = tds[2].find_all("a")[-1].get_text(strip=True)
magnet = tds[3].find(class_="download-arrow").get("href")
magnet = tds[3].find(class_="download-arrow")["href"]
size = tds[4].string

log.debug(f"Successfully got: {title}")
Expand Down
18 changes: 8 additions & 10 deletions anisearch/plugins/kisssub.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,15 @@ class Kisssub(BasePlugin):
abstract = False

def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None:
self._parser = parser
self._verify = verify
self._timefmt = timefmt
super().__init__(parser, verify, timefmt)

def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = None,
system_proxy: bool = False) -> List[Anime]:
system_proxy: bool = False, **extra_options) -> List[Anime]:
animes: List[Anime] = []
prev_anime_title = ""
page = 1

params = {'keyword': keyword}
params = {'keyword': keyword, **extra_options}
if collected:
params['complete'] = 1

Expand All @@ -54,14 +52,14 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
if not tbody:
break

current_titles = [tr.find_all('td')[2].a.get_text(strip=True) for tr in tbody.find_all('tr')]
current_titles = [tr.find_all("td")[2].a.get_text(strip=True) for tr in tbody.find_all("tr")]
if not current_titles or current_titles[0] == prev_anime_title:
break

prev_anime_title = current_titles[0]

for tr in tbody.find_all('tr'):
tds = tr.find_all('td')
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
release_time = tds[0].get_text(strip=True)
try:
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
Expand All @@ -70,13 +68,13 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
continue

title = tds[2].a.get_text(strip=True)
link = DOMAIN + tds[2].a['href']
link = DOMAIN + tds[2].a["href"]
size = tds[3].string

try:
link_html = get_html(link, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
link_bs = BeautifulSoup(link_html, self._parser)
script = link_bs.find(id='btm').find(class_='main').find('script').find_next_siblings('script')[-1].string
script = link_bs.find(id="btm").find(class_="main").find("script").find_next_siblings("script")[-1].string
magnet = get_magnet(script)
except (ValueError, AttributeError, Exception) as e:
log.error(f"Failed to get magnet link for {title}: {e}")
Expand Down
18 changes: 8 additions & 10 deletions anisearch/plugins/miobt.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,15 @@ class Miobt(BasePlugin):
abstract = False

def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None:
self._parser = parser
self._verify = verify
self._timefmt = timefmt
super().__init__(parser, verify, timefmt)

def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] = None,
system_proxy: bool = False) -> List[Anime]:
system_proxy: bool = False, **extra_options) -> List[Anime]:
animes: List[Anime] = []
prev_anime_title = ""
page = 1

params = {'keyword': keyword}
params = {'keyword': keyword, **extra_options}
if collected:
params['complete'] = 1

Expand All @@ -54,14 +52,14 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
if not tbody:
break

current_titles = [tr.find_all('td')[2].a.get_text(strip=True) for tr in tbody.find_all('tr')]
current_titles = [tr.find_all("td")[2].a.get_text(strip=True) for tr in tbody.find_all("tr")]
if not current_titles or current_titles[0] == prev_anime_title:
break

prev_anime_title = current_titles[0]

for tr in tbody.find_all('tr'):
tds = tr.find_all('td')
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
release_time = tds[0].get_text(strip=True)
try:
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y/%m/%d'))
Expand All @@ -70,13 +68,13 @@ def search(self, keyword: str, collected: bool = True, proxies: Optional[dict] =
continue

title = tds[2].a.get_text(strip=True)
link = DOMAIN + tds[2].a['href']
link = DOMAIN + tds[2].a["href"]
size = tds[3].string

try:
link_html = get_html(link, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
link_bs = BeautifulSoup(link_html, self._parser)
script = link_bs.find(id='btm').find(class_='main', id='').script.find_next_siblings('script')[-1].string
script = link_bs.find(id="btm").find(class_="main", id="").script.find_next_siblings("script")[-1].string
magnet = get_magnet(script)
except (ValueError, AttributeError, IndexError) as e:
log.error(f"Failed to get magnet link for {title}: {e}")
Expand Down
16 changes: 6 additions & 10 deletions anisearch/plugins/nyaa.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,13 @@ class Nyaa(BasePlugin):
abstract = False

def __init__(self, parser: str = 'lxml', verify: bool = False, timefmt: str = r'%Y/%m/%d %H:%M') -> None:
self._parser = parser
self._verify = verify
self._timefmt = timefmt
super().__init__(parser, verify, timefmt)

def search(self, keyword: str, collected: bool = False, proxies: Optional[dict] = None,
system_proxy: bool = False) -> List[Anime]:
system_proxy: bool = False, **extra_options) -> List[Anime]:
animes: List[Anime] = []
page = 1
params = {'q': keyword, 'c': "1_0"}
params = {'q': keyword, 'c': "1_0", **extra_options}

if collected:
log.warning("Nyaa search does not support collection.")
Expand All @@ -35,7 +33,7 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
try:
html = get_html(url, verify=self._verify, proxies=proxies, system_proxy=system_proxy)
bs = BeautifulSoup(html, self._parser)
tbody = bs.find('tbody')
tbody = bs.find("tbody")

if not tbody or tbody.string == "\n":
break
Expand All @@ -44,14 +42,12 @@ def search(self, keyword: str, collected: bool = False, proxies: Optional[dict]
tds = tr.find_all("td")
if len(tds) < 5:
continue

release_time = tds[4].string
release_time = time.strftime(self._timefmt, time.strptime(release_time, '%Y-%m-%d %H:%M'))

title = tds[1].a.get("title")
magnet_links = tds[2].find_all("a")
if len(magnet_links) < 2:
continue
magnet = magnet_links[1].get("href")
magnet = tds[2].find_all("a")[1].get("href")
size = tds[3].string

log.debug(f"Successfully got: {title}")
Expand Down
Loading

0 comments on commit c8c54ad

Please sign in to comment.