diff --git a/scaner/scaner.py b/scaner/scaner.py index 90bad98..100223e 100644 --- a/scaner/scaner.py +++ b/scaner/scaner.py @@ -13,7 +13,7 @@ def scan(self, root_path: str, _depth=0): """ if os.path.isdir(root_path): # 检查是否是文件夹 folder = os.path.basename(root_path) - rjcode = Dlsite.parse_rjcode(folder) + rjcode = Dlsite.parse_workno(folder) if rjcode: # 检查文件夹名称中是否含RJ号 yield rjcode, root_path elif _depth < self.__max_depth: diff --git a/scraper/dlsite.py b/scraper/dlsite.py index e8cd9b7..5978acd 100644 --- a/scraper/dlsite.py +++ b/scraper/dlsite.py @@ -21,14 +21,15 @@ def _load_translations(): class Dlsite(object): TRANSLATIONS: Final = _load_translations() + WORKNO_PATTERN: Final = re.compile(r'[RBV]J(\d{6}|\d{8})(?!\d+)') RJCODE_PATTERN: Final = re.compile(r'RJ(\d{6}|\d{8})(?!\d+)') RGCODE_PATTERN: Final = re.compile(r'RG(\d{5})(?!\d+)') SRICODE_PATTERN: Final = re.compile(r'SRI(\d{10})(?!\d+)') - # 提取字符串中的 rjcode + # 提取字符串中的 workno @staticmethod - def parse_rjcode(string: str): - match = Dlsite.RJCODE_PATTERN.search(string.upper()) + def parse_workno(string: str): + match = Dlsite.WORKNO_PATTERN.search(string.upper()) if match: return match.group() else: diff --git a/scraper/scraper.py b/scraper/scraper.py index dd4006d..59fa067 100644 --- a/scraper/scraper.py +++ b/scraper/scraper.py @@ -156,15 +156,15 @@ def __parse_metadata(self, html: str, rjcode: str): def scrape_metadata(self, rjcode: str): rjcode = rjcode.upper() - if not Dlsite.RJCODE_PATTERN.fullmatch(rjcode): + if not Dlsite.WORKNO_PATTERN.fullmatch(rjcode): raise ValueError # html = self.__request_work_page(rjcode) # metadata = self.__parse_metadata(html, rjcode) metadata = self.__scrape_metadata_from_product_api(rjcode) return metadata - def __scrape_metadata_from_product_api(self, rjcode: str): - product_info = self.__request_product_api(rjcode) + def __scrape_metadata_from_product_api(self, workno: str): + product_info = self.__request_product_api(workno) metadata: WorkMetadata = { 'rjcode': product_info['workno'],