Skip to content

Commit

Permalink
支持作品 id 为 BJ 或 VJ 开头的音声
Browse files Browse the repository at this point in the history
  • Loading branch information
yodhcn committed Apr 19, 2023
1 parent 9c8d392 commit a9d0b9c
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
2 changes: 1 addition & 1 deletion scaner/scaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def scan(self, root_path: str, _depth=0):
"""
if os.path.isdir(root_path): # 检查是否是文件夹
folder = os.path.basename(root_path)
rjcode = Dlsite.parse_rjcode(folder)
rjcode = Dlsite.parse_workno(folder)
if rjcode: # 检查文件夹名称中是否含RJ号
yield rjcode, root_path
elif _depth < self.__max_depth:
Expand Down
7 changes: 4 additions & 3 deletions scraper/dlsite.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ def _load_translations():

class Dlsite(object):
TRANSLATIONS: Final = _load_translations()
WORKNO_PATTERN: Final = re.compile(r'[RBV]J(\d{6}|\d{8})(?!\d+)')
RJCODE_PATTERN: Final = re.compile(r'RJ(\d{6}|\d{8})(?!\d+)')
RGCODE_PATTERN: Final = re.compile(r'RG(\d{5})(?!\d+)')
SRICODE_PATTERN: Final = re.compile(r'SRI(\d{10})(?!\d+)')

# 提取字符串中的 rjcode
# 提取字符串中的 workno
@staticmethod
def parse_rjcode(string: str):
match = Dlsite.RJCODE_PATTERN.search(string.upper())
def parse_workno(string: str):
match = Dlsite.WORKNO_PATTERN.search(string.upper())
if match:
return match.group()
else:
Expand Down
6 changes: 3 additions & 3 deletions scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,15 @@ def __parse_metadata(self, html: str, rjcode: str):

def scrape_metadata(self, rjcode: str):
rjcode = rjcode.upper()
if not Dlsite.RJCODE_PATTERN.fullmatch(rjcode):
if not Dlsite.WORKNO_PATTERN.fullmatch(rjcode):
raise ValueError
# html = self.__request_work_page(rjcode)
# metadata = self.__parse_metadata(html, rjcode)
metadata = self.__scrape_metadata_from_product_api(rjcode)
return metadata

def __scrape_metadata_from_product_api(self, rjcode: str):
product_info = self.__request_product_api(rjcode)
def __scrape_metadata_from_product_api(self, workno: str):
product_info = self.__request_product_api(workno)

metadata: WorkMetadata = {
'rjcode': product_info['workno'],
Expand Down

0 comments on commit a9d0b9c

Please sign in to comment.