diff --git a/.gitignore b/.gitignore
index 09c2e03..ae00707 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,7 @@ data/result-*/
 data/cache/
 config.json
 .superpowers/
+autor-test/.env
 
 # Local-only files (Claude instructions)
 CLAUDE.md
diff --git a/citationclaw/app/config_manager.py b/citationclaw/app/config_manager.py
index 689a1cf..c26ef30 100644
--- a/citationclaw/app/config_manager.py
+++ b/citationclaw/app/config_manager.py
@@ -140,6 +140,9 @@ class AppConfig(BaseModel):
     # Semantic Scholar API Key (提升速率限制: 1 req/s → 10-100 req/s)
     s2_api_key: str = Field(default="", description="Semantic Scholar API Key（可选，大幅提升 PDF 下载成功率）")
 
+    # Web of Science Starter API Key (结构化作者提取)
+    wos_api_key: str = Field(default="", description="Web of Science Starter API Key（用于结构化作者提取，优先级高于 S2）")
+
     # MinerU Cloud API
     mineru_api_token: str = Field(default="", description="MinerU Cloud Precision API Token（可选，用于大文件解析）")
 
diff --git a/citationclaw/app/main.py b/citationclaw/app/main.py
index 78bca2e..2d498df 100644
--- a/citationclaw/app/main.py
+++ b/citationclaw/app/main.py
@@ -133,6 +133,7 @@ class ConfigUpdate(BaseModel):
     dashboard_skip_citing_analysis: bool = False
     dashboard_model: str = "gemini-3-flash-preview-nothinking"
     s2_api_key: str = ""
+    wos_api_key: str = ""
     mineru_api_token: str = ""
     cdp_debug_port: int = 0
     api_access_token: str = ""
@@ -165,7 +166,6 @@ async def get_providers():
 async def save_config(config: ConfigUpdate):
     try:
         data = config.model_dump()
-        # Debug: log MinerU token save status
         token = data.get("mineru_api_token", "")
         if token:
             print(f"[CONFIG] MinerU token 已保存: {token[:8]}...({len(token)} chars)")
diff --git a/citationclaw/app/task_executor.py b/citationclaw/app/task_executor.py
index 81f7d8f..f527687 100644
--- a/citationclaw/app/task_executor.py
+++ b/citationclaw/app/task_executor.py
@@ -88,6 +88,21 @@ async def _run_new_phase2_and_3(
         collector = MetadataCollector(
             s2_api_key=getattr(config, 's2_api_key', None),
         )
+
+        # 结构化作者提取：WOS→S2→MinerU（有 wos_api_key 时启用）
+        _wos_key = getattr(config, 'wos_api_key', '') or ''
+        _s2_key_for_fetcher = getattr(config, 's2_api_key', '') or ''
+        from citationclaw.core.structured_author_fetcher import StructuredAuthorFetcher
+        structured_fetcher: Optional[StructuredAuthorFetcher] = None
+        if _wos_key:
+            structured_fetcher = StructuredAuthorFetcher(
+                wos_api_key=_wos_key,
+                s2_api_key=_s2_key_for_fetcher,
+                log_callback=self.log_manager.info,
+            )
+            self.log_manager.info(f"📋 WOS 结构化作者提取已启用（WOS + S2 双源融合）")
+        else:
+            self.log_manager.info("⚪ 未配置 WOS Key，使用默认 S2+OpenAlex 流程")
         self_cite_detector = SelfCitationDetector()
         prefilter = ScholarPreFilter()
 
@@ -173,11 +188,50 @@ async def _fetch_one(idx: int, paper: dict, canonical: str):
                         metadata = cached
                         api_hits += 1
                     else:
-                        # S2-first: search by title, then by URL if title miss
-                        metadata = await collector.collect(title, paper_url=paper_link)
+                        # WOS→S2→OpenAlex 结构化提取（有 wos_api_key 时优先）
+                        wos_authors = []
+                        wos_source = ""
+                        if structured_fetcher:
+                            try:
+                                wos_authors, wos_source = await structured_fetcher.fetch(title)
+                            except Exception:
+                                wos_authors = []
+
+                        if wos_authors:
+                            # WOS/S2 结构化结果直接使用，跳过 collector
+                            metadata = {
+                                "title": title,
+                                "doi": "", "s2_id": "", "arxiv_id": "",
+                                "year": paper.get("paper_year"),
+                                "cited_by_count": 0, "influential_citation_count": 0,
+                                "pdf_url": "", "oa_pdf_url": "", "venue": "",
+                                "authors": wos_authors,
+                                "sources": [wos_source],
+                            }
+                        else:
+                            # Fallback: S2-first via collector
+                            metadata = await collector.collect(title, paper_url=paper_link)
+
                         if metadata:
                             await metadata_cache.update(metadata.get("doi", ""), title, metadata)
                         api_queries += 1
+
+                    # ── DEBUG: per-paper author detail ──
+                    if metadata:
+                        _src = ",".join(metadata.get("sources", [])) or "?"
+                        _authors = metadata.get("authors", [])
+                        self.log_manager.info(
+                            f"  ┌─[{_src}] {title[:52]}"
+                        )
+                        for _a in _authors[:10]:
+                            _n = _a.get("name", "?")
+                            _af = (_a.get("affiliation", "") or "—")[:45]
+                            _afsrc = f" [{_a['affiliation_source']}]" if _a.get("affiliation_source") else ""
+                            self.log_manager.info(f"  │  {_n} | {_af}{_afsrc}")
+                        if len(_authors) > 10:
+                            self.log_manager.info(f"  │  …共 {len(_authors)} 位")
+                        self.log_manager.info("  └─")
+
                     results_slots[idx] = metadata
                 except Exception as e:
                     # Don't let one paper's API failure crash the entire batch
@@ -267,6 +321,36 @@ async def _fetch_one(idx: int, paper: dict, canonical: str):
             f"Phase 2 完成: API找到 {api_found} / GS兜底 {gs_fallback_count} / "
             f"缓存 {api_hits} / 共 {len(records_data)} 篇"
         )
+
+        # ── DEBUG: save per-paper author breakdown to JSON ──
+        try:
+            import json as _dbg_json
+            _debug_records = []
+            for _i, (_paper, _meta, _canon) in enumerate(records_data):
+                _debug_records.append({
+                    "idx": _i + 1,
+                    "title": _paper.get("paper_title", ""),
+                    "canonical": _canon,
+                    "source": ",".join((_meta or {}).get("sources", [])) or "gs_fallback",
+                    "authors": [
+                        {
+                            "name": _a.get("name", ""),
+                            "affiliation": _a.get("affiliation", ""),
+                            "affiliation_source": _a.get("affiliation_source", ""),
+                            "s2_id": _a.get("s2_id", ""),
+                        }
+                        for _a in (_meta or {}).get("authors", [])
+                    ],
+                })
+            _debug_file = result_dir / f"{output_prefix}_author_debug.json"
+            _debug_file.write_text(
+                _dbg_json.dumps(_debug_records, ensure_ascii=False, indent=2),
+                encoding="utf-8",
+            )
+            self.log_manager.info(f"📄 作者调试文件: {_debug_file}")
+        except Exception as _e:
+            self.log_manager.warning(f"调试文件保存失败: {_e}")
+
         if gs_fallback_count > len(records_data) * 0.5:
             self.log_manager.warning(
                 f"⚠ {gs_fallback_count} 篇论文 API 未找到（S2/OpenAlex 均未收录），"
diff --git a/citationclaw/config/prompts/pdf_author_extract.txt b/citationclaw/config/prompts/pdf_author_extract.txt
index 21e45ef..14017c7 100644
--- a/citationclaw/config/prompts/pdf_author_extract.txt
+++ b/citationclaw/config/prompts/pdf_author_extract.txt
@@ -1,14 +1,15 @@
-以下是一篇学术论文首页的文本块（来自 PDF 解析，按排版顺序排列）：
+以下是一篇学术论文的文本块（来自 PDF 解析，按排版顺序排列，可能包含多页内容）：
 
 {first_page_text}
 
 请从中提取所有作者及其单位信息。
 
 要求：
-1. 只提取论文首页中明确列出的作者和单位
-2. 注意区分作者名和其他文本（如标题、摘要、关键词）
+1. 若首页仅显示 team 名称（如 "ABC Team", "Research Group XYZ"）而非具体人名，请在后续文本块中查找完整的作者个人名单
+2. 只提取论文中明确列出的作者和单位，不允许捏造，不允许混入参考文献中的作者
 3. 如果有对应的邮箱，也请提取
 4. 机构名请用论文中写的原文（不要翻译）
+5. 姓名请使用完整格式（名 姓），不要使用逗号分隔格式
 
 以 JSON 数组格式输出：
 [
diff --git a/citationclaw/core/affiliation_validator.py b/citationclaw/core/affiliation_validator.py
index 71bf284..7c4d932 100644
--- a/citationclaw/core/affiliation_validator.py
+++ b/citationclaw/core/affiliation_validator.py
@@ -1,14 +1,8 @@
-"""Cross-validate author affiliations between API data and PDF-extracted data.
-
-Strategy:
-- Match authors by name (fuzzy, handles Chinese/English variants)
-- PDF affiliation = publication-time truth (preferred)
-- API affiliation = current affiliation (may have changed)
-- Merge: PDF > API Author-level > API paper-level > empty
-"""
 import re
 from typing import List, Optional
 
+from citationclaw.core.author_name_utils import format_wos_name, name_keys
+
 
 class AffiliationValidator:
     """Cross-validate and merge author data from API and PDF sources."""
@@ -26,7 +20,7 @@ def validate(self, api_authors: List[dict], pdf_authors: List[dict]) -> List[dic
         if not pdf_authors:
             return api_authors
         if not api_authors:
-            return [{"name": a["name"], "affiliation": a.get("affiliation", ""),
+            return [{"name": format_wos_name(a["name"]) or a["name"], "affiliation": a.get("affiliation", ""),
                       "country": "", "affiliation_source": "pdf"}
                     for a in pdf_authors]
 
@@ -79,7 +73,7 @@ def validate(self, api_authors: List[dict], pdf_authors: List[dict]) -> List[dic
             if not (pdf_keys & matched_pdf_names):
                 pdf_affil = pdf_a.get("affiliation", "")
                 merged.append({
-                    "name": pdf_a["name"],
+                    "name": format_wos_name(pdf_a["name"]) or pdf_a["name"],
                     "affiliation": pdf_affil,
                     "email": pdf_a.get("email", ""),
                     "country": self._infer_country(pdf_affil),
@@ -115,7 +109,8 @@ def _infer_country(affiliation: str) -> str:
         if any(k in aff for k in cn_kw):
             return "CN"
         # US institutions
-        us_kw = ["mit ", "m.i.t", "stanford", "harvard", "berkeley",
+        us_kw = ["mit", "m.i.t", "massachusetts institute of technology",
+                 "stanford", "harvard", "berkeley",
                  "carnegie mellon", "cmu", "princeton", "yale",
                  "columbia university", "cornell", "ucla", "caltech",
                  "university of california", "university of michigan",
@@ -163,18 +158,18 @@ def _infer_country(affiliation: str) -> str:
 
     @staticmethod
     def _name_keys(name: str) -> set:
-        """Extract all name variants for matching (same logic as scholar dedup)."""
         keys = set()
         cleaned = name.strip()
         if not cleaned:
             return keys
-        # Split on parentheses and slashes
+
         parts = re.split(r'[()（）/／]', cleaned)
         for part in parts:
-            p = part.strip().strip(',，、').strip()
-            if p and len(p) >= 2:
-                keys.add(p.lower())
+            part = part.strip().strip(',，、').strip()
+            if part and len(part) >= 2:
+                keys.update(name_keys(part))
+
         base = re.sub(r'[（(].*?[）)]', '', cleaned).strip()
         if base and len(base) >= 2:
-            keys.add(base.lower())
+            keys.update(name_keys(base))
         return keys
diff --git a/citationclaw/core/author_name_utils.py b/citationclaw/core/author_name_utils.py
new file mode 100644
index 0000000..9303794
--- /dev/null
+++ b/citationclaw/core/author_name_utils.py
@@ -0,0 +1,261 @@
+import re
+import unicodedata
+
+
+SPECIAL_CHAR_MAP = str.maketrans(
+    {
+        "Ł": "L",
+        "ł": "l",
+        "Đ": "D",
+        "đ": "d",
+        "Ø": "O",
+        "ø": "o",
+    }
+)
+
+
+def strip_accents(text: str) -> str:
+    normalized = unicodedata.normalize("NFKD", (text or "").translate(SPECIAL_CHAR_MAP))
+    return "".join(ch for ch in normalized if not unicodedata.combining(ch))
+
+
+def _clean_token(text: str) -> str:
+    text = strip_accents(text or "")
+    text = text.lower()
+    text = re.sub(r"[^a-z0-9]+", " ", text)
+    return " ".join(text.split())
+
+
+def split_name_parts(name: str) -> tuple[str, list[str]]:
+    cleaned = strip_accents(name or "")
+    cleaned = re.sub(r"\s+", " ", cleaned).strip().strip(",")
+    if not cleaned:
+        return "", []
+
+    if "," in cleaned:
+        family, given = [part.strip() for part in cleaned.split(",", 1)]
+        given_parts = [part for part in re.split(r"\s+", given) if part]
+        return family, given_parts
+
+    parts = [part for part in re.split(r"\s+", cleaned) if part]
+    if len(parts) == 1:
+        return parts[0], []
+    return parts[-1], parts[:-1]
+
+
+def format_wos_name(name: str) -> str:
+    family, given_parts = split_name_parts(name)
+    if not family:
+        return ""
+    family = re.sub(r"\s+", " ", family).strip()
+    given = " ".join(given_parts).strip()
+    return f"{family}, {given}".strip().strip(",")
+
+
+def display_to_full_name(name: str) -> str:
+    family, given_parts = split_name_parts(name)
+    if not family:
+        return ""
+    if not given_parts:
+        return family
+    return " ".join([*given_parts, family]).strip()
+
+
+def name_keys(name: str) -> set[str]:
+    full_name = display_to_full_name(name)
+    normalized_full = _clean_token(full_name)
+    if not normalized_full:
+        return set()
+
+    parts = normalized_full.split()
+    family = parts[-1]
+    given = parts[:-1]
+
+    keys = {
+        normalized_full,
+        family,
+        _clean_token(format_wos_name(name)),
+    }
+
+    if given:
+        keys.add(f"{given[0]} {family}")
+        keys.add(f"{family} {given[0]}")
+        keys.add(f"{given[0][0]} {family}")
+        keys.add(f"{family} {given[0][0]}")
+        keys.add(f"{family} {' '.join(given)}")
+        if len(given) > 1:
+            initials = " ".join(part[0] for part in given if part)
+            if initials:
+                keys.add(f"{initials} {family}")
+                keys.add(f"{family} {initials}")
+
+    return {key for key in keys if key}
+
+
+# ---------------------------------------------------------------------------
+# Enhanced matching (handles WOS abbreviated initials, inverted format, etc.)
+# ---------------------------------------------------------------------------
+
+def _normalize_allcaps(name: str) -> str:
+    """'FU, DARWIN Y' → 'Fu, Darwin Y'. Only triggers when entire name is uppercase."""
+    stripped = re.sub(r"[,.\s]", "", name)
+    if stripped.isalpha() and stripped.isupper() and len(stripped) > 2:
+        return name.title()
+    return name
+
+
+def _is_initials_token(s: str) -> bool:
+    """Return True if s looks like initials: 1–4 uppercase-only alpha chars (e.g. 'K', 'KM', 'CA')."""
+    s = re.sub(r"[\s.]+", "", s)
+    return bool(s) and s.isalpha() and s.upper() == s and 1 <= len(s) <= 4
+
+
+def _expand_initials(token: str) -> list[str]:
+    """Split a given-name token into a list of single uppercase initials.
+
+    'KM'       → ['K', 'M']     (WOS concatenated syllable initials for Chinese names)
+    'K M'      → ['K', 'M']
+    'K.'       → ['K']
+    'Kaiming'  → ['K']          (full name → just first char)
+    'Christopher' → ['C']
+    """
+    token = token.strip().rstrip(".")
+    parts = [p.rstrip(".") for p in re.split(r"[\s.]+", token) if p.rstrip(".")]
+    if not parts:
+        return []
+    if len(parts) > 1:
+        return [p[0].upper() for p in parts if p]
+    token = parts[0]
+    # Concatenated uppercase initials like "KM", "XY", "CA"
+    if token.isupper() and token.isalpha() and 2 <= len(token) <= 4:
+        return list(token)
+    return [token[0].upper()]
+
+
+def _parse_for_match(name: str) -> tuple[str, list[str]]:
+    """Parse any name format → (family_normalized, [given_initials])."""
+    name = _normalize_allcaps(name.strip())
+    family, given_parts = split_name_parts(name)
+
+    if not family:
+        return "", []
+
+    given_str = " ".join(given_parts)
+    if _is_initials_token(family) and given_str and len(given_str) > 3:
+        family, given_parts = given_str, [family]
+
+    family_norm = _clean_token(strip_accents(family))
+
+    initials: list[str] = []
+    for part in given_parts:
+        initials.extend(_expand_initials(part))
+
+    return family_norm, initials
+
+
+def _parse_first_given(name: str) -> tuple[str, str]:
+    """Parse any name format → (family_norm, first_given_norm).
+
+    Preserves the full first given-name token (not reduced to initial) so that
+    names_match can distinguish 'Christopher' from 'Carol' even though both start with C.
+
+    'He, Kaiming'     → ('he', 'kaiming')
+    'He, KM'          → ('he', 'km')         ← len<=2 → treated as initials downstream
+    'Eger, T'         → ('eger', 't')         ← len=1  → initials
+    'H, Melchinger'   → ('melchinger', 'h')   ← inverted
+    'FU, DARWIN Y'    → ('fu', 'darwin')
+    'Manning, Christopher D.' → ('manning', 'christopher')
+    """
+    name = _normalize_allcaps(name.strip())
+    family, given_parts = split_name_parts(name)
+
+    if not family:
+        return "", ""
+
+    given_str = " ".join(given_parts)
+    if _is_initials_token(family) and given_str and len(given_str) > 3:
+        family, given_parts = given_str, [family]
+
+    family_norm = _clean_token(strip_accents(family))
+    first_given = _clean_token(strip_accents(given_parts[0])) if given_parts else ""
+    return family_norm, first_given
+
+
+def names_match(name_a: str, name_b: str) -> bool:
+    """Return True if two name strings likely refer to the same person.
+
+    Matching rules, applied in order:
+
+    1. Exact match after normalization.
+    2. Family name must be identical.
+    3. If either side has no given-name info → family match is sufficient.
+    4. If either first given token is 'initials' (len ≤ 2, e.g. 'k', 'km', 't'):
+       → first character must match.
+       'He, KM' matches 'He, Kaiming'  (k == k)
+       'Eger, T' matches 'Eger, Thomas' (t == t)
+    5. Both sides have full given names (len ≥ 3):
+       → first 3 characters must match.
+       'Manning, Christopher' does NOT match 'Manning, Carol'  (chr ≠ car)
+       'He, Kaiming' matches 'He, Kai'  (kai == kai, prefix of length 3)
+       'Li, Wei' does NOT match 'Li, Wenbo'  (wei ≠ wen)
+
+    Rule 5 prevents two distinct people with the same surname and same first
+    initial from being incorrectly merged.
+    """
+    if not name_a or not name_b:
+        return False
+
+    # Rule 1
+    if _clean_token(strip_accents(name_a)) == _clean_token(strip_accents(name_b)):
+        return True
+
+    fam_a, given_a = _parse_first_given(name_a)
+    fam_b, given_b = _parse_first_given(name_b)
+
+    # Rule 2
+    if not fam_a or not fam_b or fam_a != fam_b:
+        return False
+
+    # Rule 3
+    if not given_a or not given_b:
+        return True
+
+    # Rules 4 & 5: initials = len ≤ 2 after clean_token
+    is_init_a = len(given_a) <= 2
+    is_init_b = len(given_b) <= 2
+
+    if is_init_a or is_init_b:
+        # Rule 4: at least one abbreviated → first character must match
+        return given_a[0] == given_b[0]
+
+    # Rule 5: both full names → 3-char prefix must match
+    prefix = min(len(given_a), len(given_b), 3)
+    return given_a[:prefix] == given_b[:prefix]
+
+
+def to_natural_name(name: str) -> str:
+    """Convert any name format to natural 'First Last' (no comma).
+
+    'He, Kaiming'    → 'Kaiming He'
+    'He, KM'         → 'KM He'
+    'Manning, Christopher D.' → 'Christopher D. Manning'
+    'H, Melchinger'  → 'H Melchinger'   (inverted detected: H is initial, Melchinger is surname)
+    'FU, DARWIN Y'   → 'Darwin Y Fu'    (all-caps normalized)
+    'Kaiming He'     → 'Kaiming He'     (already natural, no change)
+    """
+    name = _normalize_allcaps(name.strip())
+    family, given_parts = split_name_parts(name)
+
+    if not family:
+        return name
+
+    # Inverted detection
+    given_str = " ".join(given_parts)
+    if _is_initials_token(family) and given_str and len(given_str) > 3:
+        family, given_parts = given_str, [family]
+
+    if not given_parts:
+        return family
+
+    given = " ".join(given_parts)
+    return f"{given} {family}"
diff --git a/citationclaw/core/author_searcher.py b/citationclaw/core/author_searcher.py
index a19d270..33df9ea 100644
--- a/citationclaw/core/author_searcher.py
+++ b/citationclaw/core/author_searcher.py
@@ -5,6 +5,7 @@
 from openai import AsyncOpenAI
 import httpx
 from citationclaw.core.author_cache import AuthorInfoCache
+from citationclaw.core.structured_author_fetcher import StructuredAuthorFetcher
 
 
 class AuthorSearcher:
@@ -27,6 +28,9 @@ def __init__(
         target_paper_authors: Optional[str] = None,
         author_cache: Optional[AuthorInfoCache] = None,
         cancel_event: Optional[asyncio.Event] = None,
+        wos_api_key: str = "",
+        s2_api_key: str = "",
+        mineru_api_token: str = "",
     ):
         """
         作者学术信息搜索器
@@ -91,6 +95,27 @@ def __init__(
         self.author_cache: Optional[AuthorInfoCache] = author_cache
         self.cancel_event: Optional[asyncio.Event] = cancel_event
 
+        # 结构化作者提取（WOS→S2→MinerU），有 wos_api_key 或 s2_api_key 时启用
+        self.structured_fetcher: Optional[StructuredAuthorFetcher] = None
+        if wos_api_key or s2_api_key:
+            self.structured_fetcher = StructuredAuthorFetcher(
+                wos_api_key=wos_api_key,
+                s2_api_key=s2_api_key,
+                mineru_api_token=mineru_api_token,
+                openai_api_key=api_key,
+                openai_base_url=base_url,
+                model=model,
+                log_callback=log_callback,
+            )
+            sources = []
+            if wos_api_key:
+                sources.append("WOS")
+            if s2_api_key:
+                sources.append("S2")
+            log_callback(f"📋 结构化作者提取已启用：{'→'.join(sources)}→MinerU")
+        else:
+            log_callback("⚪ 结构化作者提取未启用（未配置 WOS/S2 API Key）")
+
         # 自引检测 Prompt（使用轻量级模型）
         self.self_citation_check_prompt = (
             "【任务】判断一篇施引论文是否为自引。\n"
@@ -378,6 +403,30 @@ async def _search_single_paper(
                 'Authors_with_Profile': str(paper_content['authors']),
             }
 
+            # ── 结构化作者提取（WOS→S2→MinerU）─────────────────────────────
+            if self.structured_fetcher:
+                doi = paper_content.get('doi', '')
+                pdf_path = paper_content.get('pdf_path', None)
+                self.log_callback(f"  🔍 [结构化] 查询作者: {paper_title[:50]}...")
+                try:
+                    struct_authors, struct_source = await self.structured_fetcher.fetch(
+                        paper_title, doi=doi, pdf_path=pdf_path
+                    )
+                    record_dict['Paper_Authors'] = struct_authors
+                    record_dict['Paper_Authors_Source'] = struct_source
+                    if struct_authors:
+                        self.log_callback(
+                            f"  📋 [{struct_source}] 找到 {len(struct_authors)} 位作者: {paper_title[:40]}..."
+                        )
+                    else:
+                        self.log_callback(
+                            f"  ⚪ [结构化] 未找到作者（WOS/S2 均无收录）: {paper_title[:40]}..."
+                        )
+                except Exception as exc:
+                    self.log_callback(f"  ⚠️ 结构化作者提取失败: {exc}")
+                    record_dict['Paper_Authors'] = []
+                    record_dict['Paper_Authors_Source'] = ''
+
             # ── 查询缓存（取出已有字段供后续各步使用）────────────────────────
             cached = (await self.author_cache.get(paper_link, paper_title)) if self.author_cache else None
 
diff --git a/citationclaw/core/http_utils.py b/citationclaw/core/http_utils.py
index 88a542a..14a51f7 100644
--- a/citationclaw/core/http_utils.py
+++ b/citationclaw/core/http_utils.py
@@ -18,14 +18,26 @@ def _detect_http_proxy() -> Optional[str]:
     return None
 
 
+def _detect_ca_bundle() -> Optional[str]:
+    """Return custom CA bundle path if set via any of the common env vars."""
+    for var in ["CA_BUNDLE_PATH", "SSL_CERT_FILE", "REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE"]:
+        val = os.environ.get(var, "").strip().strip('"').strip("'")
+        if val and os.path.exists(val):
+            return val
+    return None
+
+
 def make_async_client(timeout: float = 30.0) -> httpx.AsyncClient:
-    """Create an httpx AsyncClient with auto-detected proxy.
+    """Create an httpx AsyncClient with auto-detected proxy and CA bundle.
 
     Uses HTTP proxy if available, otherwise direct connection.
+    Respects CA_BUNDLE_PATH / SSL_CERT_FILE for corporate TLS interception.
     """
     proxy = _detect_http_proxy()
+    ca_bundle = _detect_ca_bundle()
     return httpx.AsyncClient(
         proxy=proxy,
         timeout=timeout,
+        verify=ca_bundle if ca_bundle else True,
         headers={"User-Agent": "CitationClaw/2.0 (academic research tool)"},
     )
diff --git a/citationclaw/core/pdf_author_extractor.py b/citationclaw/core/pdf_author_extractor.py
index 7a7fe42..b8c0f7c 100644
--- a/citationclaw/core/pdf_author_extractor.py
+++ b/citationclaw/core/pdf_author_extractor.py
@@ -1,4 +1,4 @@
-"""Extract authors and affiliations from PDF first page using lightweight LLM."""
+"""Extract authors and affiliations from PDF content using lightweight LLM."""
 import json
 from typing import List, Optional
 
@@ -7,7 +7,7 @@
 
 
 class PDFAuthorExtractor:
-    """Extract authors + affiliations from PDF first page via lightweight LLM."""
+    """Extract authors + affiliations from PDF full content via lightweight LLM."""
 
     def __init__(self, api_key: str = "", base_url: str = "", model: str = ""):
         self._api_key = api_key
@@ -15,17 +15,17 @@ def __init__(self, api_key: str = "", base_url: str = "", model: str = ""):
         self._model = model
         self._prompt_loader = PromptLoader()
 
-    async def extract(self, first_page_blocks: list) -> List[dict]:
-        """Send first-page text blocks to lightweight LLM, return author list.
+    async def extract(self, blocks: list) -> List[dict]:
+        """Send PDF text blocks to lightweight LLM, return author list.
 
         Returns: [{"name": "...", "affiliation": "...", "email": "..."}]
         """
-        if not self._api_key or not first_page_blocks:
+        if not self._api_key or not blocks:
             return []
 
         # Build text from blocks
         lines = []
-        for i, b in enumerate(first_page_blocks[:20]):
+        for i, b in enumerate(blocks):
             text = b.get("text", "").strip() if isinstance(b, dict) else str(b).strip()
             if text:
                 lines.append(f"[{i}] {text}")
diff --git a/citationclaw/core/pdf_mineru_parser.py b/citationclaw/core/pdf_mineru_parser.py
index 21bc248..d69914f 100644
--- a/citationclaw/core/pdf_mineru_parser.py
+++ b/citationclaw/core/pdf_mineru_parser.py
@@ -115,6 +115,20 @@ def parse(self, pdf_path: Path, paper_key: str) -> Optional[dict]:
                 return result
         return self._parse_pymupdf(pdf_path, output_dir)
 
+    @staticmethod
+    def _write_meta(output_dir: Path, source: str, parsed_at: str) -> None:
+        (output_dir / "meta.json").write_text(
+            json.dumps(
+                {
+                    "source": source,
+                    "parsed_at": parsed_at,
+                },
+                ensure_ascii=False,
+                indent=2,
+            ),
+            encoding="utf-8",
+        )
+
     @staticmethod
     def _get_page_count(pdf_path: Path) -> int:
         """Quick page count via PyMuPDF (no full parse)."""
@@ -150,14 +164,26 @@ async def parse_async(self, pdf_path: Path, paper_key: str) -> Optional[dict]:
         - All cloud fail: Local MinerU (serial) → PyMuPDF
         """
         output_dir = self._output_base / paper_key
+        if self._log:
+            self._log(
+                f"    [MinerU] 开始解析 key={paper_key} file={pdf_path.name}"
+            )
 
         # 1. Cache
         cached = self._load_cached(output_dir)
         if cached:
+            if self._log:
+                self._log(
+                    f"    [MinerU] 命中缓存 source={cached.get('source','?')} dir={output_dir}"
+                )
             return cached
 
         file_size = pdf_path.stat().st_size if pdf_path.exists() else 0
         page_count = self._get_page_count(pdf_path)
+        if self._log:
+            self._log(
+                f"    [MinerU] 文件信息 size={file_size // 1024}KB pages={page_count}"
+            )
 
         # Skip oversized PDFs (>100MB or >200 pages)
         if file_size > 100 * 1024 * 1024 or page_count > 200:
@@ -169,22 +195,30 @@ async def parse_async(self, pdf_path: Path, paper_key: str) -> Optional[dict]:
 
         # 2. Cloud routing — try both tiers with smart fallthrough
         if is_large and self._mineru_token and not self._cloud_precision_failed:
+            if self._log:
+                self._log("    [MinerU] 路由: large PDF -> Cloud Precision first")
             # Large + has token → Precision first
             result = await self._parse_cloud_precision(pdf_path, output_dir)
             if result:
                 return result
             # Precision failed → try Agent as fallback (may reject on size)
             if not self._cloud_agent_disabled:
+                if self._log:
+                    self._log("    [MinerU] Cloud Precision 未成功，尝试 Cloud Agent")
                 result = await self._parse_cloud_agent(pdf_path, output_dir)
                 if result:
                     return result
         elif is_large and not self._mineru_token:
+            if self._log:
+                self._log("    [MinerU] 路由: large PDF without token -> try Cloud Agent")
             # Large + no token → Agent will likely reject, try anyway then local
             if not self._cloud_agent_disabled:
                 result = await self._parse_cloud_agent(pdf_path, output_dir)
                 if result:
                     return result
         else:
+            if self._log:
+                self._log("    [MinerU] 路由: small PDF -> Cloud Agent first")
             # Small file → Agent first
             if not self._cloud_agent_disabled:
                 result = await self._parse_cloud_agent(pdf_path, output_dir)
@@ -192,12 +226,16 @@ async def parse_async(self, pdf_path: Path, paper_key: str) -> Optional[dict]:
                     return result
             # Agent failed → try Precision if token available
             if self._mineru_token and not self._cloud_precision_failed:
+                if self._log:
+                    self._log("    [MinerU] Cloud Agent 未成功，尝试 Cloud Precision")
                 result = await self._parse_cloud_precision(pdf_path, output_dir)
                 if result:
                     return result
 
         # 3. Local MinerU (serialized — only 1 at a time)
         if self._has_local_mineru and not self._local_mineru_failed:
+            if self._log:
+                self._log("    [MinerU] 尝试本地 MinerU")
             async with _local_mineru_lock:
                 result = await asyncio.to_thread(
                     self._parse_local_mineru, pdf_path, output_dir
@@ -206,18 +244,34 @@ async def parse_async(self, pdf_path: Path, paper_key: str) -> Optional[dict]:
                     return result
 
         # 4. PyMuPDF fallback
+        if self._log:
+            self._log("    [MinerU] 所有 MinerU 路径未成功，回退到 PyMuPDF")
         return self._parse_pymupdf(pdf_path, output_dir)
 
     # ── Cloud Agent API (free, ≤10MB/≤20 pages) ──────────────────────────
 
     @staticmethod
     def _make_direct_client(timeout: float = 120.0):
-        """Create httpx client WITHOUT proxy — MinerU is a China service, no proxy needed."""
+        """Create direct httpx client while still honoring explicit CA bundle env vars.
+
+        We intentionally keep ``trust_env=False`` to avoid inheriting system proxies
+        such as socks5/ALL_PROXY, but Cloudflare Gateway / enterprise TLS setups often
+        require a custom CA bundle passed via ``SSL_CERT_FILE`` or
+        ``REQUESTS_CA_BUNDLE``. httpx ignores those env vars when ``trust_env=False``,
+        so we wire them into ``verify`` manually.
+        """
         import httpx
+        ca_bundle = None
+        for var in ["CA_BUNDLE_PATH", "SSL_CERT_FILE", "REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE"]:
+            val = (os.environ.get(var) or "").strip().strip('"').strip("'")
+            if val and os.path.exists(val):
+                ca_bundle = val
+                break
         return httpx.AsyncClient(
             proxy=None,
             trust_env=False,  # Ignore ALL_PROXY / socks5 env vars
             timeout=timeout,
+            verify=ca_bundle if ca_bundle else True,
             headers={"User-Agent": "CitationClaw/2.0"},
         )
 
@@ -289,14 +343,15 @@ async def _parse_cloud_agent(self, pdf_path: Path, output_dir: Path) -> Optional
             # Save to cache
             output_dir.mkdir(parents=True, exist_ok=True)
             (output_dir / "full.md").write_text(md_text, encoding="utf-8")
-
+            parsed_at = datetime.now(timezone.utc).isoformat()
+            self._write_meta(output_dir, "mineru_cloud_agent", parsed_at)
             return {
                 "content_list": [],
                 "full_md": md_text,
                 "first_page_blocks": self._md_to_first_page(md_text),
                 "references_md": self._extract_references(md_text),
                 "source": "mineru_cloud_agent",
-                "parsed_at": datetime.now(timezone.utc).isoformat(),
+                "parsed_at": parsed_at,
             }
         except Exception as e:
             err = str(e)[:80]
@@ -463,6 +518,8 @@ def _extract_from_zip(self, zip_bytes: bytes, output_dir: Path) -> Optional[dict
         if not md_text:
             return None
 
+        parsed_at = datetime.now(timezone.utc).isoformat()
+        self._write_meta(output_dir, "mineru_cloud_precision", parsed_at)
         return {
             "content_list": content_list,
             "full_md": md_text,
@@ -472,7 +529,7 @@ def _extract_from_zip(self, zip_bytes: bytes, output_dir: Path) -> Optional[dict
             ),
             "references_md": self._extract_references(md_text),
             "source": "mineru_cloud_precision",
-            "parsed_at": datetime.now(timezone.utc).isoformat(),
+            "parsed_at": parsed_at,
         }
 
     # ── Local MinerU (needs GPU + models) ─────────────────────────────────
@@ -520,6 +577,8 @@ def _parse_local_mineru(self, pdf_path: Path, output_dir: Path) -> Optional[dict
             # First successful local parse → symlink models to project for next time
             self._ensure_project_model_link()
 
+            parsed_at = datetime.now(timezone.utc).isoformat()
+            self._write_meta(output_dir, "mineru_local", parsed_at)
             return {
                 "content_list": content_list,
                 "full_md": md_text,
@@ -528,7 +587,7 @@ def _parse_local_mineru(self, pdf_path: Path, output_dir: Path) -> Optional[dict
                 ),
                 "references_md": self._extract_references(md_text),
                 "source": "mineru_local",
-                "parsed_at": datetime.now(timezone.utc).isoformat(),
+                "parsed_at": parsed_at,
             }
         except Exception as e:
             error_msg = str(e)
@@ -594,13 +653,15 @@ def _parse_pymupdf(self, pdf_path: Path, output_dir: Path) -> Optional[dict]:
             output_dir.mkdir(parents=True, exist_ok=True)
             (output_dir / "full.md").write_text(full_text, encoding="utf-8")
 
+            parsed_at = datetime.now(timezone.utc).isoformat()
+            self._write_meta(output_dir, "pymupdf", parsed_at)
             return {
                 "content_list": [],
                 "full_md": full_text,
                 "first_page_blocks": self._md_to_first_page(first_page),
                 "references_md": self._extract_references(full_text),
                 "source": "pymupdf",
-                "parsed_at": datetime.now(timezone.utc).isoformat(),
+                "parsed_at": parsed_at,
             }
         except Exception:
             return None
@@ -619,10 +680,17 @@ def _load_cached(self, output_dir: Path) -> Optional[dict]:
         try:
             md_text = md_path.read_text(encoding="utf-8")
             content_list = []
+            meta_path = output_dir / "meta.json"
+            source = ""
+            parsed_at = datetime.now(timezone.utc).isoformat()
             for f in output_dir.rglob("*content_list.json"):
                 with open(f) as fh:
                     content_list = json.load(fh)
                 break
+            if meta_path.exists():
+                meta = json.loads(meta_path.read_text(encoding="utf-8"))
+                source = meta.get("source", "")
+                parsed_at = meta.get("parsed_at", parsed_at)
             return {
                 "content_list": content_list,
                 "full_md": md_text,
@@ -631,8 +699,8 @@ def _load_cached(self, output_dir: Path) -> Optional[dict]:
                     if content_list else self._md_to_first_page(md_text)
                 ),
                 "references_md": self._extract_references(md_text),
-                "source": "mineru" if content_list else "pymupdf",
-                "parsed_at": datetime.now(timezone.utc).isoformat(),
+                "source": source or ("mineru" if content_list else "pymupdf"),
+                "parsed_at": parsed_at,
             }
         except Exception:
             return None
diff --git a/citationclaw/core/scraper.py b/citationclaw/core/scraper.py
index 3fcbd5f..d04f311 100644
--- a/citationclaw/core/scraper.py
+++ b/citationclaw/core/scraper.py
@@ -283,12 +283,12 @@ def _parse_citation_count(self, html: str) -> int:
             from bs4 import BeautifulSoup
             soup = BeautifulSoup(html, 'html.parser')
             stat_patterns = [
-                r'找到约\s*([\d,]+)\s*条',
-                r'获得\s*([\d,]+)\s*条',
-                r'约\s*([\d,]+)\s*条',
-                r'([\d,]+)\s*条结果',
-                r'About\s+([\d,]+)\s+results?',
-                r'^([\d,]+)\s+results?\b',
+                r'找到约\s*([\d,.\s]+?)\s*条',
+                r'获得\s*([\d,.\s]+?)\s*条',
+                r'约\s*([\d,.\s]+?)\s*条',
+                r'([\d,.\s]+?)\s*条结果',
+                r'About\s+([\d,.\s]+?)\s+results?',
+                r'^([\d,.\s]+?)\s+results?\b',
             ]
             candidates = []
             id_elem = soup.find(id='gs_ab_mdw')
@@ -301,25 +301,29 @@ def _parse_citation_count(self, html: str) -> int:
                 for pat in stat_patterns:
                     m = re.search(pat, stat_text, re.IGNORECASE)
                     if m:
-                        citation_count = int(m.group(1).replace(',', ''))
-                        self.log_callback(f"🔍 结果统计元素文本: {stat_text[:100]}")
-                        return citation_count
+                        raw = re.sub(r'[,.\s]', '', m.group(1).strip())
+                        if raw.isdigit():
+                            citation_count = int(raw)
+                            self.log_callback(f"🔍 结果统计元素文本: {stat_text[:100]}")
+                            return citation_count
         except Exception:
             pass
 
         # 第二步：对整个 HTML 做正则（数字可能含 <b> 标签）
         patterns = [
-            r'找到约\s*(?:<[^>]+>)?\s*([\d,]+)\s*(?:<[^>]+>)?\s*条',
-            r'获得\s*(?:<[^>]+>)?\s*([\d,]+)\s*(?:<[^>]+>)?\s*条',
-            r'约\s*(?:<[^>]+>)?\s*([\d,]+)\s*(?:<[^>]+>)?\s*条结果',
-            r'About\s+(?:<[^>]+>)?\s*([\d,]+)\s+results?',
-            r'([\d,]+)\s*条结果',
-            r'>(\d[\d,]*)\s+results?\b',
+            r'找到约\s*(?:<[^>]+>)?\s*([\d,.\s]+?)\s*(?:<[^>]+>)?\s*条',
+            r'获得\s*(?:<[^>]+>)?\s*([\d,.\s]+?)\s*(?:<[^>]+>)?\s*条',
+            r'约\s*(?:<[^>]+>)?\s*([\d,.\s]+?)\s*(?:<[^>]+>)?\s*条结果',
+            r'About\s+(?:<[^>]+>)?\s*([\d,.\s]+?)\s+results?',
+            r'([\d,.\s]+?)\s*条结果',
+            r'>(\d[\d,.\s]*?)\s+results?\b',
         ]
         for pattern in patterns:
             match = re.search(pattern, html, re.IGNORECASE)
             if match:
-                return int(match.group(1).replace(',', ''))
+                raw = re.sub(r'[,.\s]', '', match.group(1).strip())
+                if raw.isdigit():
+                    return int(raw)
 
         return 0
 
diff --git a/citationclaw/core/structured_author_fetcher.py b/citationclaw/core/structured_author_fetcher.py
new file mode 100644
index 0000000..adf8e9a
--- /dev/null
+++ b/citationclaw/core/structured_author_fetcher.py
@@ -0,0 +1,310 @@
+"""Structured author fetching: WOS → S2 fallback → MinerU affiliation supplement."""
+from __future__ import annotations
+
+import asyncio
+import re
+import sys
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlencode
+
+import httpx
+
+from citationclaw.core.author_name_utils import format_wos_name, name_keys, to_natural_name
+from citationclaw.core.s2_client import S2Client
+
+
+_WOS_ENDPOINT = "https://api.clarivate.com/apis/wos-starter/v1/documents"
+
+
+def _normalize_title(text: str) -> str:
+    import unicodedata
+    text = unicodedata.normalize("NFKD", text or "").lower()
+    text = re.sub(r"[^a-z0-9]+", " ", text)
+    return " ".join(text.split())
+
+
+def _wos_hit_authors(hit: dict) -> list[dict[str, Any]]:
+    names = hit.get("names", {}) or {}
+    authors = []
+    for a in names.get("authors", []) or []:
+        raw = (a.get("wosStandard", "") or a.get("displayName", "")).strip()
+        name = to_natural_name(format_wos_name(raw) or raw)
+        if name:
+            authors.append({"name": name, "affiliation": "", "email": "", "source": "wos"})
+    return authors
+
+
+async def _query_wos(
+    api_key: str,
+    title: str,
+    doi: str,
+    *,
+    retries: int = 2,
+    retry_wait: float = 20.0,
+) -> list[dict[str, Any]]:
+    """Query WOS Starter API; returns author list (empty on failure or not found)."""
+    if not api_key:
+        return []
+    query = f"DO=({doi})" if doi else f'TI=("{title}")'
+    params = {"db": "WOS", "q": query, "limit": 5, "page": 1}
+    headers = {"X-ApiKey": api_key}
+
+    for attempt in range(retries + 1):
+        try:
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                resp = await client.get(
+                    f"{_WOS_ENDPOINT}?{urlencode(params)}", headers=headers
+                )
+                resp.raise_for_status()
+                hits = resp.json().get("hits", []) or []
+        except httpx.HTTPStatusError as exc:
+            if exc.response.status_code == 429 and attempt < retries:
+                await asyncio.sleep(retry_wait * (attempt + 1))
+                continue
+            return []
+        except Exception:
+            if attempt < retries:
+                await asyncio.sleep(10.0)
+                continue
+            return []
+
+        if not hits:
+            return []
+        # DOI match preferred; fallback to first title-equal hit
+        title_norm = _normalize_title(title)
+        best = None
+        for h in hits:
+            ids = h.get("identifiers", {}) or {}
+            if doi and (ids.get("doi") or "").lower() == doi.lower():
+                best = h
+                break
+        if best is None:
+            for h in hits:
+                if _normalize_title(h.get("title", "")) == title_norm:
+                    best = h
+                    break
+        if best is None:
+            best = hits[0]
+        return _wos_hit_authors(best)
+    return []
+
+
+async def _query_s2(s2_client: S2Client, title: str) -> list[dict[str, Any]]:
+    result = await s2_client.search_paper(title)
+    if not result:
+        return []
+    authors = []
+    for a in result.get("authors", []):
+        raw = a.get("name", "")
+        name = to_natural_name(format_wos_name(raw) or raw)
+        if name:
+            authors.append({
+                "name": name,
+                "affiliation": a.get("affiliation", ""),
+                "email": "",
+                "s2_id": a.get("s2_id", ""),
+                "source": "s2",
+            })
+    return authors
+
+
+def _merge_wos_s2(
+    wos_authors: list[dict[str, Any]],
+    s2_authors: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """Enrich WOS author list with S2 affiliation/s2_id via name matching.
+
+    WOS is authoritative for the author list; S2 fills in missing affiliations.
+    Names are matched with names_match() which handles initials, accents, and
+    different formatting conventions (WOS abbreviated ↔ S2 full names).
+    """
+    from citationclaw.core.author_name_utils import names_match
+
+    if not s2_authors:
+        return wos_authors
+    if not wos_authors:
+        return s2_authors
+
+    matched_s2_ids: set[int] = set()
+    merged: list[dict[str, Any]] = []
+
+    for wos_a in wos_authors:
+        enriched = dict(wos_a)
+        for s2_a in s2_authors:
+            if names_match(wos_a.get("name", ""), s2_a.get("name", "")):
+                matched_s2_ids.add(id(s2_a))
+                if s2_a.get("affiliation") and not enriched.get("affiliation"):
+                    enriched["affiliation"] = s2_a["affiliation"]
+                    enriched["affiliation_source"] = "s2"
+                if s2_a.get("s2_id") and not enriched.get("s2_id"):
+                    enriched["s2_id"] = s2_a["s2_id"]
+                if s2_a.get("openalex_id") and not enriched.get("openalex_id"):
+                    enriched["openalex_id"] = s2_a["openalex_id"]
+                break
+        merged.append(enriched)
+
+    return [a for a in merged if a.get("name")]
+
+
+def _merge_with_pdf(
+    api_authors: list[dict[str, Any]],
+    pdf_authors: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """Enrich api_authors with affiliations from pdf_authors; append unmatched PDF authors."""
+    if not pdf_authors:
+        return api_authors
+    if not api_authors:
+        return [{"name": to_natural_name(a.get("name", "")), "affiliation": a.get("affiliation", ""),
+                 "email": a.get("email", ""), "source": "pdf"} for a in pdf_authors]
+
+    pdf_by_key: dict[str, dict] = {}
+    for a in pdf_authors:
+        for k in name_keys(a.get("name", "")):
+            pdf_by_key.setdefault(k, a)
+
+    matched_ids: set[int] = set()
+    merged = []
+    for api_a in api_authors:
+        enriched = dict(api_a)
+        match = None
+        for k in name_keys(api_a.get("name", "")):
+            if k in pdf_by_key:
+                match = pdf_by_key[k]
+                matched_ids.add(id(match))
+                break
+        if match:
+            if match.get("affiliation") and not enriched.get("affiliation"):
+                enriched["affiliation"] = match["affiliation"]
+            if match.get("email") and not enriched.get("email"):
+                enriched["email"] = match["email"]
+        merged.append(enriched)
+
+    for pdf_a in pdf_authors:
+        if id(pdf_a) not in matched_ids:
+            merged.append({
+                "name": to_natural_name(pdf_a.get("name", "")),
+                "affiliation": pdf_a.get("affiliation", ""),
+                "email": pdf_a.get("email", ""),
+                "source": "pdf_only",
+            })
+    return [a for a in merged if a.get("name")]
+
+
+class StructuredAuthorFetcher:
+    """Fetch structured author list: WOS → S2 fallback → MinerU affiliation supplement.
+
+    All returned names are in natural format ("First Last", no comma).
+    """
+
+    def __init__(
+        self,
+        wos_api_key: str = "",
+        s2_api_key: str = "",
+        mineru_api_token: str = "",
+        openai_api_key: str = "",
+        openai_base_url: str = "",
+        model: str = "",
+        pdf_cache_dir: str | Path | None = None,
+        log_callback=None,
+    ):
+        self._wos_key = wos_api_key
+        self._s2_client = S2Client(api_key=s2_api_key) if s2_api_key or True else None
+        self._mineru_token = mineru_api_token
+        self._openai_key = openai_api_key
+        self._openai_base = openai_base_url
+        self._model = model
+        self._pdf_cache_dir = Path(pdf_cache_dir) if pdf_cache_dir else None
+        self._log = log_callback or (lambda msg: None)
+
+    async def fetch(
+        self,
+        title: str,
+        doi: str = "",
+        pdf_path: str | Path | None = None,
+    ) -> tuple[list[dict[str, Any]], str]:
+        """Return (author_list, source_label).
+
+        source_label: "wos" | "s2" | "pdf" | "" (empty = no authors found)
+        """
+        # Step 1: WOS
+        wos_authors: list[dict] = []
+        if self._wos_key:
+            try:
+                wos_authors = await _query_wos(self._wos_key, title, doi)
+            except Exception as exc:
+                self._log(f"[StructuredAuthorFetcher] WOS error: {exc}")
+
+        # Step 2: PDF (MinerU) — try to get affiliations from PDF
+        pdf_authors: list[dict] = []
+        pdf_path_resolved = Path(pdf_path) if pdf_path else None
+        if pdf_path_resolved and pdf_path_resolved.exists() and self._openai_key:
+            try:
+                pdf_authors = await self._run_mineru(pdf_path_resolved, title, doi)
+            except Exception as exc:
+                self._log(f"[StructuredAuthorFetcher] MinerU error: {exc}")
+
+        if wos_authors:
+            if pdf_authors:
+                # WOS + PDF: enrich WOS names with PDF affiliations
+                merged = _merge_with_pdf(wos_authors, pdf_authors)
+                self._log(
+                    f"[StructuredAuthorFetcher] WOS {len(wos_authors)} + PDF {len(pdf_authors)}"
+                    f" → merged {len(merged)} authors"
+                )
+                return merged, "wos+pdf"
+            else:
+                # WOS succeeded but no PDF — fall back to S2 for affiliations
+                s2_authors: list[dict] = []
+                if self._s2_client:
+                    try:
+                        s2_authors = await _query_s2(self._s2_client, title)
+                    except Exception as exc:
+                        self._log(f"[StructuredAuthorFetcher] S2 error: {exc}")
+                if s2_authors:
+                    merged = _merge_wos_s2(wos_authors, s2_authors)
+                    self._log(
+                        f"[StructuredAuthorFetcher] WOS {len(wos_authors)} + S2 {len(s2_authors)}"
+                        f" → merged {len(merged)} authors"
+                    )
+                    return merged, "wos+s2"
+                else:
+                    return wos_authors, "wos"
+
+        # WOS failed — fall back to S2
+        s2_authors: list[dict] = []
+        if self._s2_client:
+            try:
+                s2_authors = await _query_s2(self._s2_client, title)
+            except Exception as exc:
+                self._log(f"[StructuredAuthorFetcher] S2 error: {exc}")
+
+        if s2_authors:
+            if pdf_authors:
+                merged = _merge_with_pdf(s2_authors, pdf_authors)
+                return merged, "s2+pdf"
+            return s2_authors, "s2"
+
+        if pdf_authors:
+            return [{"name": to_natural_name(a.get("name", "")), "affiliation": a.get("affiliation", ""),
+                     "email": a.get("email", ""), "source": "pdf"} for a in pdf_authors], "pdf"
+
+        return [], ""
+
+    async def _run_mineru(self, pdf_path: Path, title: str, doi: str) -> list[dict]:
+        from citationclaw.core.pdf_mineru_parser import MinerUParser
+        from citationclaw.core.pdf_author_extractor import PDFAuthorExtractor
+
+        cache_dir = self._pdf_cache_dir or (pdf_path.parent / ".pdf_parsed_cache")
+        parser = MinerUParser(output_base=cache_dir, mineru_api_token=self._mineru_token)
+        extractor = PDFAuthorExtractor(
+            api_key=self._openai_key,
+            base_url=self._openai_base,
+            model=self._model,
+        )
+        paper_key = parser.paper_key({"doi": doi, "title": title or pdf_path.stem})
+        parsed = await parser.parse_async(pdf_path, paper_key)
+        if not parsed:
+            return []
+        blocks = parsed.get("content_list") or parsed.get("first_page_blocks", [])
+        return await extractor.extract(blocks)
diff --git a/citationclaw/skills/phase2_author_intel.py b/citationclaw/skills/phase2_author_intel.py
index d1dfdcf..7bc41df 100644
--- a/citationclaw/skills/phase2_author_intel.py
+++ b/citationclaw/skills/phase2_author_intel.py
@@ -45,6 +45,9 @@ async def _run_inner(self, ctx: SkillContext, **kwargs) -> SkillResult:
             target_paper_authors=target_paper_authors,
             author_cache=author_cache,
             cancel_event=quota_event,
+            wos_api_key=getattr(config, 'wos_api_key', ''),
+            s2_api_key=getattr(config, 's2_api_key', ''),
+            mineru_api_token=getattr(config, 'mineru_api_token', ''),
         )
 
         await searcher.search(
diff --git a/citationclaw/static/js/main.js b/citationclaw/static/js/main.js
index 3aed5ba..9a6f9ab 100644
--- a/citationclaw/static/js/main.js
+++ b/citationclaw/static/js/main.js
@@ -663,6 +663,7 @@ function initIndexPage() {
             if (el('idx-service-tier')) el('idx-service-tier').value = cfg.service_tier || 'basic';
             if (el('idx-dashboard-model')) el('idx-dashboard-model').value = cfg.dashboard_model || 'gemini-3-flash-preview-nothinking';
             if (el('idx-s2-api-key')) el('idx-s2-api-key').value = cfg.s2_api_key || '';
+            if (el('idx-wos-api-key')) el('idx-wos-api-key').value = cfg.wos_api_key || '';
             if (el('idx-mineru-token')) el('idx-mineru-token').value = cfg.mineru_api_token || '';
             if (el('idx-api-access-token')) el('idx-api-access-token').value = cfg.api_access_token || '';
             if (el('idx-api-user-id')) el('idx-api-user-id').value = cfg.api_user_id || '';
@@ -709,6 +710,7 @@ function initIndexPage() {
                 }[el('idx-service-tier')?.value || 'basic']),
                 dashboard_model: el('idx-dashboard-model')?.value || '',
                 s2_api_key: el('idx-s2-api-key')?.value || '',
+                wos_api_key: el('idx-wos-api-key')?.value || '',
                 mineru_api_token: el('idx-mineru-token')?.value || '',
                 api_access_token: el('idx-api-access-token')?.value || '',
                 api_user_id: el('idx-api-user-id')?.value || '',
@@ -721,6 +723,7 @@ function initIndexPage() {
             if (!body.api_access_token && existing.api_access_token) delete body.api_access_token;
             if (!body.api_user_id && existing.api_user_id) delete body.api_user_id;
             if (!body.s2_api_key && existing.s2_api_key) delete body.s2_api_key;
+            if (!body.wos_api_key && existing.wos_api_key) delete body.wos_api_key;
             if (!body.mineru_api_token && existing.mineru_api_token) delete body.mineru_api_token;
             const merged = Object.assign({}, existing, body);
             const resp = await safeFetch('/api/config', {
diff --git a/citationclaw/templates/index.html b/citationclaw/templates/index.html
index 55ff28c..a1ddd18 100644
--- a/citationclaw/templates/index.html
+++ b/citationclaw/templates/index.html
@@ -244,6 +244,10 @@ <h1 class="hero-title">论文被引<span>画像分析</span></h1>
         <label class="form-label">Semantic Scholar API Key <span style="color:var(--light);font-weight:400">(可选)</span></label>
         <input type="text" id="idx-s2-api-key" class="form-control form-control-sm" placeholder="留空使用免费配额" autocomplete="off">
       </div>
+      <div class="col-md-5">
+        <label class="form-label">Web of Science API Key <span style="color:var(--light);font-weight:400">(可选，结构化作者提取)</span></label>
+        <input type="text" id="idx-wos-api-key" class="form-control form-control-sm" placeholder="WOS Starter API Key" autocomplete="off">
+      </div>
       <div class="col-md-3">
         <label class="form-label">结果文件夹前缀</label>
         <input type="text" id="idx-result-folder-prefix" class="form-control form-control-sm" placeholder="留空则默认 result-时间戳">