From ba775a833d39d81b36e5436cbbda33de796d2445 Mon Sep 17 00:00:00 2001
From: jiaojiaodubai <63148861+jiaojiaodubai@users.noreply.github.com>
Date: Tue, 26 Nov 2024 14:36:51 +0800
Subject: [PATCH] fix #412
---
CCPINFO.js | 122 +++++------
CNKI Gongjushu.js | 173 ++++++++++++---
CNKI TIKS.js | 185 +++-------------
Douban.js | 13 +-
Duxiu.js | 19 +-
Modern History.js | 540 ++++++++++++++++++++++++++++------------------
PatentStar.js | 31 ++-
7 files changed, 584 insertions(+), 499 deletions(-)
diff --git a/CCPINFO.js b/CCPINFO.js
index c7c4ef07..d53d500c 100644
--- a/CCPINFO.js
+++ b/CCPINFO.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 12,
"browserSupport": "gcsibv",
- "lastUpdated": "2024-03-08 09:49:44"
+ "lastUpdated": "2024-11-11 05:42:47"
}
/*
@@ -46,8 +46,8 @@ function filterQuery(items) {
if (typeof items == 'string' || !items.length) items = [items];
// filter out invalid queries
- var isbns = [], isbn;
- for (var i = 0, n = items.length; i < n; i++) {
+ let isbns = [], isbn;
+ for (let i = 0, n = items.length; i < n; i++) {
if (items[i].ISBN && (isbn = ZU.cleanISBN(items[i].ISBN))) {
isbns.push(isbn);
}
@@ -62,14 +62,14 @@ function filterQuery(items) {
async function doSearch(items) {
Z.debug('get items:');
Z.debug(items);
- for (let isbn of filterQuery(items)) {
+ for (const isbn of filterQuery(items)) {
await processISBN(isbn);
}
}
async function processISBN(isbn) {
Z.debug(`prosessing ISBN: ${isbn}`);
- let search = await requestText(
+ const search = await requestText(
'https://book.cppinfo.cn/So/Search/Index',
{
method: 'POST',
@@ -81,14 +81,14 @@ async function processISBN(isbn) {
}
}
);
- let href = 'https://book.cppinfo.cn' + tryMatch(search, /"p-text"> span > div > div.p-text > a');
- for (let row of rows) {
- // Z.debug(row);
- let href = row.href;
- // Z.debug(href);
- let title = ZU.trimInternal(row.title);
- // Z.debug(title);
+ const items = {};
+ let found = false;
+ const rows = doc.querySelectorAll('#tbinfo > span > div > div.p-text > a');
+ for (const row of rows) {
+ const href = row.href;
+ const title = ZU.trimInternal(row.title);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
@@ -119,7 +116,7 @@ async function doWeb(doc, url) {
if (detectWeb(doc, url) == 'multiple') {
let items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
- for (let url of Object.keys(items)) {
+ for (const url of Object.keys(items)) {
await scrape(await requestDocument(url));
}
}
@@ -129,10 +126,9 @@ async function doWeb(doc, url) {
}
async function scrape(doc, url = doc.location.href) {
- // Z.debug(doc);
- var newItem = new Zotero.Item('book');
+ const newItem = new Zotero.Item('book');
newItem.extra = '';
- let labels = new Labels(doc, '.book_intro .fl.clearfix, .book_intro .fr.clearfix');
+ const labels = new Labels(doc, '.book_intro .fl.clearfix, .book_intro .fr.clearfix');
Z.debug(labels.data.map(arr => [arr[0], ZU.trimInternal(arr[1].innerText)]));
newItem.title = text(doc, '.book_intro > h2 > span');
newItem.abstractNote = text(doc, 'div.field_1 > p');
@@ -141,15 +137,14 @@ async function scrape(doc, url = doc.location.href) {
newItem.ISBN = labels.get('ISBN');
if (newItem.ISBN) {
try {
- let searchLang = await requestText(
+ const searchLang = await requestText(
'https://book.cppinfo.cn/So/Search/LangSearch',
{
method: 'POST',
body: `key=${newItem.ISBN}&offset=1&hasEbook=false`
}
);
- // Z.debug(searchLang);
- let langFlag = searchLang.split('\r\n')[1].match(/([a-z]+)(?=(&))/)[0];
+ const langFlag = searchLang.split('\r\n')[1].match(/([a-z]+)(?=(&))/)[0];
Z.debug(langFlag);
newItem.language = {
chi: 'zh-CN',
@@ -164,45 +159,35 @@ async function scrape(doc, url = doc.location.href) {
}
}
newItem.url = url;
+ newItem.libraryCatalog = '国家出版发行信息公共服务平台';
newItem.extra += addExtra('CLC', labels.get('中图分类'));
newItem.extra += addExtra('price', labels.get('定价'));
- let authors = labels.get('著者').replace(/[等主编原著]*$/, '').split(/[;;,,]/g)
- .filter(string => string != '暂无')
- .map((creator) => {
- // 方括号:https://book.cppinfo.cn/Encyclopedias/home/index?id=4483977
- // 西文括号:https://book.cppinfo.cn/Encyclopedias/home/index?id=4616652
- // 中文括号:https://book.cppinfo.cn/Encyclopedias/home/index?id=4557869
- let country = tryMatch(creator, /^[[((](.+?)[\]))]/, 1);
- creator = creator.replace(/^[[((].+?[\]))]/, '');
- creator = ZU.cleanAuthor(creator, 'author');
- creator.country = country;
- return creator;
- });
- let translators = labels.get('译者').replace(/[翻译]*$/, '').split(/[;;,,]/g)
- .filter(string => string != '暂无')
- .map((translator) => {
- return ZU.cleanAuthor(translator, 'translator');
- });
- // https://book.cppinfo.cn/Encyclopedias/home/index?id=4286780
- let contributors = labels.get('编辑').replace(/[编校注]*$/, '').split(/[;;,,]/g)
- .filter(string => string != '暂无')
- .map((translator) => {
- return ZU.cleanAuthor(translator, 'contributor');
- });
- let creators = [...authors, ...translators, ...contributors];
- if (creators.some(creator => creator.country)) {
- newItem.extra += addExtra('creatorsExt', JSON.stringify(creators));
+ function splitCreators(label, rolePattern, creatorType) {
+ return labels.get(label).replace(rolePattern, '').split(';')
+ .filter(string => string != '暂无')
+ .map((name) => {
+ // 方括号:https://book.cppinfo.cn/Encyclopedias/home/index?id=4483977
+ // 西文括号:https://book.cppinfo.cn/Encyclopedias/home/index?id=4616652
+ // 中文括号:https://book.cppinfo.cn/Encyclopedias/home/index?id=4557869
+ const country = tryMatch(name, /^[[((](.+?)[\]))]/, 1);
+ const creator = ZU.cleanAuthor(name.replace(/^[[((].+?[\]))]/, ''), creatorType);
+ if (/[\u4e00-\u9fff]/.test(creator.lastName)) {
+ creator.fieldMode = 1;
+ }
+ newItem.creators.push(JSON.parse(JSON.stringify(creator)));
+ creator.country = country;
+ return creator;
+ });
}
- creators.forEach((creator) => {
- if (/[\u4e00-\u9fff]/.test(creator.lastName)) {
- creator.fieldMode = 1;
- }
- delete creator.country;
- newItem.creators.push(creator);
- });
- Z.debug(creators);
- let tags = doc.querySelectorAll('div.book_label > div.label_in > span');
- tags.forEach(tag => newItem.tags.push(tag.innerText));
+ const creatorsExt = [
+ ...splitCreators('著者', /[等主编原著]*$/, 'author'),
+ ...splitCreators('编辑', /[等主总]*编$/, 'editor'),
+ ...splitCreators('译者', /[等翻译]*$/, 'translator')
+ ];
+ if (creatorsExt.some(creator => creator.country)) {
+ newItem.extra += addExtra('creatorsExt', JSON.stringify(creatorsExt));
+ }
+ doc.querySelectorAll('div.book_label > div.label_in > span').forEach(elm => newItem.tags.push(elm.innerText));
newItem.complete();
}
@@ -268,14 +253,6 @@ function addExtra(key, value) {
: '';
}
-/**
- * Attempts to get the part of the pattern described from the character,
- * and returns an empty string if not match.
- * @param {String} string
- * @param {RegExp} pattern
- * @param {Number} index
- * @returns
- */
function tryMatch(string, pattern, index = 0) {
if (!string) return '';
let match = string.match(pattern);
@@ -304,9 +281,8 @@ var testCases = [
"date": "2022-07",
"ISBN": "9787562866350",
"abstractNote": "《朗文新编英语语法》由我社从培生教育出版公司引进,可供中学生、大学生以及英语学习者自学使用。本书由诊断测试、语法讲解和练习答案三部分组成。学习者可利用图书前面的诊断测试,在单元学习前,了解自己的薄弱项,从而进行有针对性的学习。语法知识点讲解部分共分为36个单元,包含一般语法书中的所有内容,语法解释详细,内容条理清晰。每个单元还配套丰富的练习题,学习者可通过练习巩固所学的语法点。本书所有例句均选自语料库,表达地道,是中高水平学习者巩固语法的不二之选。",
- "extra": "CLC: H314\nprice: ¥ 80.00\ncreatorsExt: [{\"firstName\":\"\",\"lastName\":\"马克·福利\",\"creatorType\":\"author\",\"country\":\"英\"},{\"firstName\":\"\",\"lastName\":\"\",\"creatorType\":\"translator\"}]",
- "language": "zh-CN",
- "libraryCatalog": "CCPINFO",
+ "extra": "CLC: H314\nprice: ¥ 80.00\ncreatorsExt: [{\"firstName\":\"\",\"lastName\":\"马克·福利\",\"creatorType\":\"author\",\"fieldMode\":1,\"country\":\"英\"},{\"firstName\":\"\",\"lastName\":\"\",\"creatorType\":\"editor\",\"country\":\"\"},{\"firstName\":\"\",\"lastName\":\"\",\"creatorType\":\"translator\",\"country\":\"\"}]",
+ "libraryCatalog": "国家出版发行信息公共服务平台",
"publisher": "郑州大学出版社有限公司",
"url": "https://book.cppinfo.cn/Encyclopedias/home/index?id=4417147",
"attachments": [],
@@ -366,8 +342,7 @@ var testCases = [
"ISBN": "9787214275707",
"abstractNote": "本书全面、系统地梳理了330-610年拜占庭王朝历史以及帝国的崛起。上编为皇帝列传,介绍了君士坦丁王朝、瓦伦提尼安诸帝、塞奥多西王朝、利奥王朝、查士丁尼王朝历史。下编为拜占庭帝国的崛起,从世界地理观、宗教、种族与身份认同、自然灾害等方面加以论述,聚焦拜占庭帝国历史的第一黄金时代,对查士丁尼时代的司法改革、宗教思想和政策、制度、经济生活等方面进行系统论述,展现拜占庭帝国的崛起图景。本书是具有唯物史观特色的、有可靠依据的独立意见和系列研究成果,形成了我国学者对拜占庭历史发展和文化演化的话语体系。",
"extra": "CLC: K134\nprice: ¥ 248.00",
- "language": "zh-CN",
- "libraryCatalog": "CCPINFO",
+ "libraryCatalog": "国家出版发行信息公共服务平台",
"publisher": "江苏人民出版社",
"url": "https://book.cppinfo.cn/Encyclopedias/home/index?id=4567148",
"attachments": [],
@@ -419,7 +394,7 @@ var testCases = [
"abstractNote": "Since the 18th National Congress of the Communist Party of China in 2012, the drive to develop socialism with Chinese characteristics has entered a new era. The past decade has seen the country forging ahead and embracing historic changes, with remarkable achievements made by the Party and the state.\n So how did China make these achievements? What are the secrets to the CPC's governance of China? What is the key to the success of China's governance in the new era?\n This book covers such important topics as \"how to break the cycle of rise and fall\", \"whole-process people's democracy\", \"self-reform and social revolution\", \"cyber governance\", and \"Chinese-style modernization\", reveals the secrets to the success of China's governance in the new era, and shares China's wisdom and solutions with the world.",
"extra": "CLC: D616\nprice: ¥ 118.00",
"language": "en-US",
- "libraryCatalog": "CCPINFO",
+ "libraryCatalog": "国家出版发行信息公共服务平台",
"publisher": "外文出版社",
"url": "https://book.cppinfo.cn/Encyclopedias/home/index?id=4479822",
"attachments": [],
@@ -471,5 +446,4 @@ var testCases = [
]
}
]
-
/** END TEST CASES **/
diff --git a/CNKI Gongjushu.js b/CNKI Gongjushu.js
index 9b273519..50c4d411 100644
--- a/CNKI Gongjushu.js
+++ b/CNKI Gongjushu.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
- "lastUpdated": "2024-03-29 10:58:26"
+ "lastUpdated": "2024-11-13 09:14:00"
}
/*
@@ -37,41 +37,145 @@
function detectWeb(_doc, url) {
- return url.includes('/detail?')
- ? 'encyclopediaArticle'
- : false;
+ if (url.includes('/detail?')) {
+ return 'encyclopediaArticle';
+ }
+ else if (url.includes('/bookdetail?')) {
+ return 'book';
+ }
+ return false;
}
async function doWeb(doc, url) {
- let newItem = new Z.Item('encyclopediaArticle');
- let extra = new Extra();
- newItem.title = text(doc, '.detailDesc > h3 > .navi-search');
- extra.set('original-title', text(doc, '.detailDesc > h3 > :last-child'), true);
- newItem.abstractNote = ZU.trimInternal(text(doc, '.detailDesc .image_box'));
- let pubInfo = text(doc, '.descBox > .descDiv:last-child').substring(6).split('.');
- Z.debug(pubInfo);
- try {
- newItem.encyclopediaTitle = pubInfo[1];
- newItem.place = tryMatch(pubInfo[2], /^(.+):/, 1);
- newItem.publisher = tryMatch(pubInfo[2], /:(.+)$/, 1);
- newItem.date = pubInfo[3];
- newItem.pages = tryMatch(pubInfo[4], /[\d,+-]+/);
+ const extra = new Extra();
+ const newItem = new Z.Item(detectWeb(doc, url));
+ const creatorsExt = [];
+ function processNames(names, role = '') {
+ let creatorType = 'author';
+ if (role.endsWith('编')) {
+ creatorType = 'editor';
+ }
+ else if (role.endsWith('译')) {
+ creatorType = 'translator';
+ }
+ names.forEach((name) => {
+ const country = tryMatch(name, /^\[(.+?)\]/, 1);
+ const original = tryMatch(name, /[((](.+?)[))]$/, 1).replace(',', ' ');
+ if (original) {
+ const originalCreator = cleanAuthor(ZU.capitalizeName(original), creatorType);
+ extra.set('original-author', `${originalCreator.firstName ? originalCreator.firstName + ' || ' : ''}${originalCreator.lastName}`, true);
+ }
+ const creator = cleanAuthor(name.replace(/^\[.+?\]/, '').replace(/[((].+?[))]$/, ''), creatorType);
+ newItem.creators.push(JSON.parse(JSON.stringify(creator)));
+ newItem.country = country;
+ newItem.original = original;
+ creatorsExt.push(creator);
+ });
}
- catch (error) {
- Z.debug(error);
+ switch (newItem.itemType) {
+ case 'encyclopediaArticle': {
+ newItem.title = text(doc, '.detailDesc > h3 > .navi-search');
+ extra.set('original-title', text(doc, '.detailDesc > h3 > :last-child'), true);
+ newItem.abstractNote = ZU.trimInternal(text(doc, '.detailDesc .image_box'));
+ const bookTitle = text(doc, '.el-tooltip', 1);
+ const pubInfoText = text(doc, '.descBox > .descDiv:last-child').substring(6);
+ const pubInfoList = tryMatch(pubInfoText, new RegExp(`${bookTitle}\\.(.+)`), 1).split('.');
+ newItem.encyclopediaTitle = bookTitle;
+ newItem.place = tryMatch(pubInfoList[0], /^(.+):/, 1);
+ newItem.publisher = tryMatch(pubInfoList[0], /:(.+)$/, 1);
+ newItem.date = pubInfoList[1];
+ newItem.pages = tryMatch(pubInfoList[2], /第([\d,+-]+)页/, 1);
+ tryMatch(pubInfoText, new RegExp(`^(.+)\\.${bookTitle}`), 1).split(';').forEach((group) => {
+ processNames(group.replace(/等?副?[总主参]?[编著]?$|翻?译$/g, '').split(','), tryMatch(group, /副?[总主参]?[编著]?$|翻?译$/));
+ });
+ break;
+ }
+ case 'book': {
+ const more = doc.querySelector('.leftBox > .moreBtn');
+ if (more) {
+ more.click();
+ await new Promise(resolve => setTimeout(resolve, 2000));
+ }
+ const labels = new Labels(doc, '.left_content > span,.moreMsg > span');
+ newItem.title = text(doc, '.rightTop > .left');
+ extra.set('original-author', ZU.capitalizeTitle(labels.get('并列正书名')));
+ newItem.abstractNote = labels.get('书目简介');
+ newItem.series = labels.get('分辑名');
+ newItem.place = labels.get('出版地');
+ newItem.publisher = labels.get('出版者');
+ newItem.date = labels.get('出版时间');
+ newItem.numPages = tryMatch(labels.get('页码'), /\d+$/);
+ const roles = labels.get('责任方式').split(';');
+ labels.get('主要责任者').split(';').forEach((group, index) => {
+ processNames(group.replace(/等$/, '').split(','), roles[index]);
+ });
+ if (creatorsExt.some(creator => creator.country || creator.original)) {
+ extra.set('creatorsExt', JSON.stringify(creatorsExt));
+ }
+ break;
+ }
}
+ newItem.language = 'zh-CN';
newItem.url = url;
- pubInfo[0].split(/[,;,;、]/).forEach((creator) => {
- creator = ZU.cleanAuthor(creator.replace(/等?副?[总主参]?[编著]?$/, ''), 'author');
- if (/[\u4e00-\u9fff]/.test(creator.lastName)) {
- creator.fieldMode = 1;
- }
- newItem.creators.push(creator);
- });
newItem.extra = extra.toString();
newItem.complete();
}
+class Labels {
+ constructor(doc, selector) {
+ this.data = [];
+ this.emptyElm = doc.createElement('div');
+ Array.from(doc.querySelectorAll(selector))
+ // avoid nesting
+ .filter(element => !element.querySelector(selector))
+ // avoid empty
+ .filter(element => !/^\s*$/.test(element.textContent))
+ .forEach((element) => {
+ const elmCopy = element.cloneNode(true);
+ // avoid empty text
+ while (/^\s*$/.test(elmCopy.firstChild.textContent)) {
+ // Z.debug(elementCopy.firstChild.textContent);
+ elmCopy.removeChild(elmCopy.firstChild);
+ // Z.debug(elementCopy.firstChild.textContent);
+ }
+ if (elmCopy.childNodes.length > 1) {
+ const key = elmCopy.removeChild(elmCopy.firstChild).textContent.replace(/\s/g, '');
+ this.data.push([key, elmCopy]);
+ }
+ else {
+ const text = ZU.trimInternal(elmCopy.textContent);
+ const key = tryMatch(text, /^[[【]?.+?[】\]::]/).replace(/\s/g, '');
+ elmCopy.textContent = tryMatch(text, /^[[【]?.+?[】\]::]\s*(.+)/, 1);
+ this.data.push([key, elmCopy]);
+ }
+ });
+ }
+
+ get(label, element = false) {
+ if (Array.isArray(label)) {
+ const results = label
+ .map(aLabel => this.get(aLabel, element));
+ const keyVal = element
+ ? results.find(element => !/^\s*$/.test(element.textContent))
+ : results.find(string => string);
+ return keyVal
+ ? keyVal
+ : element
+ ? this.emptyElm
+ : '';
+ }
+ const pattern = new RegExp(label, 'i');
+ const keyVal = this.data.find(arr => pattern.test(arr[0]));
+ return keyVal
+ ? element
+ ? keyVal[1]
+ : ZU.trimInternal(keyVal[1].textContent)
+ : element
+ ? this.emptyElm
+ : '';
+ }
+}
+
class Extra {
constructor() {
this.fields = [];
@@ -108,14 +212,6 @@ class Extra {
}
}
-/**
- * Attempts to get the part of the pattern described from the character,
- * and returns an empty string if not match.
- * @param {String} string
- * @param {RegExp} pattern
- * @param {Number} index
- * @returns
- */
function tryMatch(string, pattern, index = 0) {
if (!string) return '';
let match = string.match(pattern);
@@ -124,6 +220,15 @@ function tryMatch(string, pattern, index = 0) {
: '';
}
+function cleanAuthor(name, creatorType = 'author') {
+ const creator = ZU.cleanAuthor(name, creatorType);
+ creator.lastName = creator.lastName.replace(/\.\s*/g, '. ');
+ if (/[\u4e00-\u9fff]/.test(creator.lastName)) {
+ creator.fieldMode = 1;
+ }
+ return creator;
+}
+
/** BEGIN TEST CASES **/
var testCases = [
{
diff --git a/CNKI TIKS.js b/CNKI TIKS.js
index fe4acb05..1a0345c9 100644
--- a/CNKI TIKS.js
+++ b/CNKI TIKS.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
- "lastUpdated": "2024-03-09 09:29:04"
+ "lastUpdated": "2024-11-14 03:02:44"
}
/*
@@ -35,7 +35,7 @@
***** END LICENSE BLOCK *****
*/
-var typeMAp = {
+const typeMAp = {
journalArticle: '刊名',
patent: '专利',
standard: '标准号',
@@ -48,9 +48,9 @@ var typeMAp = {
};
function detectWeb(doc, url) {
- let result = doc.querySelector('.search-result, .main-view');
- if (result) {
- Z.monitorDOMChanges(result, { subtree: true, childList: true });
+ const results = doc.querySelector('.search-result, .main-view');
+ if (results) {
+ Z.monitorDOMChanges(results, { subtree: true, childList: true });
}
if (/\/article?.*id=/i.test(url)) {
return Object.keys(typeMAp).find(key => ZU.xpath(doc, `//span[@class="label"][contains(text(), "${typeMAp[key]}")]`).length);
@@ -62,12 +62,12 @@ function detectWeb(doc, url) {
}
function getSearchResults(doc, checkOnly) {
- var items = {};
- var found = false;
- var rows = doc.querySelectorAll('[data-col-key="TI"] > span > a');
- for (let row of rows) {
- let href = row.href;
- let title = ZU.trimInternal(row.textContent);
+ const items = {};
+ let found = false;
+ const rows = doc.querySelectorAll('[data-col-key="TI"] > span > a');
+ for (const row of rows) {
+ const href = row.href;
+ const title = ZU.trimInternal(row.textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
@@ -78,9 +78,9 @@ function getSearchResults(doc, checkOnly) {
async function doWeb(doc, url) {
if (detectWeb(doc, url) == 'multiple') {
- let items = await Zotero.selectItems(getSearchResults(doc, false));
+ const items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
- for (let url of Object.keys(items)) {
+ for (const url of Object.keys(items)) {
await scrape(await requestDocument(url), url);
}
}
@@ -90,122 +90,9 @@ async function doWeb(doc, url) {
}
async function scrape(doc, url = doc.location.href) {
- var newItem = new Z.Item(detectWeb(doc, url));
- let extra = new Extra();
+ let newItem = new Z.Item(detectWeb(doc, url));
+ const extra = new Extra();
newItem.title = text(doc, '.article-content > h3');
- Z.debug(tryMatch(url, /id=([^]+)/i, 1));
- // let info = {
- // "code": 200,
- // "data": {
- // "title": "河南省人民政府关于印发河南省基本公共服务体系“十二五”规划的通知",
- // "co": "",
- // "fileName": "la201302250090",
- // "authors": [],
- // "aff": [
- // {
- // "no": "0",
- // "value": "河南省人民政府",
- // "url": "",
- // "id": "",
- // "afc": "",
- // "favFlag": false
- // }
- // ],
- // "metadata": [
- // {
- // "key": "PBT",
- // "name": "发布日期",
- // "value": "2012-12-28",
- // "sort": 20,
- // "list": [],
- // "itemType": ""
- // },
- // {
- // "key": "OT",
- // "name": "实施日期",
- // "value": "2012-12-28",
- // "sort": 30,
- // "list": [],
- // "itemType": ""
- // },
- // {
- // "key": "FWZH",
- // "name": "发文字号",
- // "value": "豫政[2012]110号",
- // "sort": 40,
- // "list": [],
- // "itemType": ""
- // },
- // {
- // "key": "KY",
- // "name": "关键词",
- // "value": "公共服务",
- // "sort": 50,
- // "list": [
- // {
- // "type": null,
- // "no": null,
- // "value": "公共服务",
- // "url": " https://kc.cnki.net/detail/keyword?sm=Po7fM/kIWbEg5t1I31Z5tbcvxfPwvuN0R8gT8121tlaBNqL6OBSlqE01+Uq2Su5j5bCWiJKrRVnsniW+22hq4DKxqI74YY0BPLiSI4ACXCQo/GRe+WbvcPI8NlPRNFgC",
- // "aff": null,
- // "id": null,
- // "auc": null,
- // "favFlag": null,
- // "afc": null
- // }
- // ],
- // "itemType": "KEYWORD"
- // },
- // {
- // "key": "ZWKZ",
- // "name": "正文快照",
- // "value": "各省辖市、省直管试点县(市)人民政府,省人民政府各部门:\r\n 现将《河南省基本公共服务体系“十二五”规划》印发给你们,请认真组织实施。\r\n $R河南省人民政府\r\n 2012年12月28日$E\r\n 河南省基本公共服务体系“十二五”规划\r\n 序 言\r\n 基本公共服务,是指建立在一定社",
- // "sort": 60,
- // "list": [],
- // "itemType": ""
- // },
- // {
- // "key": "XLJB",
- // "name": "效力级别",
- // "value": "地方政府规章及文件",
- // "sort": 70,
- // "list": [],
- // "itemType": ""
- // },
- // {
- // "key": "SXX",
- // "name": "时效性",
- // "value": "已失效",
- // "sort": 80,
- // "list": [],
- // "itemType": ""
- // }
- // ],
- // "relations": [
- // {
- // "scope": "CAJ",
- // "url": "https://bar.cnki.net/bar/download/order?id=MC9UQPVl75WMiKxhvkw8DebjibTF1dwpzle6a8yEERreTzGpI5hC4n7cfxL%2FJ%2FSzeFJo%2BBtqMrelSyLIC0%2FI9gbmPY%2FyrVt4Xw1fMu3SqHpeFzmqFVUm9%2BBm6kiswzFo5H%2BL6Vp5BLneGY1hWS%2FJzjaAlNyXOUwlX4ITLKxQ7WmXFcmfT0IY8xNoyrMJ9frv%2BM7Qpio9P1dY87Zwtj6pW275c9isC0D7e1vHcsvJcd13Ez128zK8Y1WH9O9wYfeUqABKhnQUYWMQZGc8uXswgm87esHVP7Hf%2BkA%2BFJMtM8iTuyXne8tHwR8K%2FutpjJjU&source=EKRP"
- // },
- // {
- // "scope": "PDF",
- // "url": "https://bar.cnki.net/bar/download/order?id=MC9UQPVl75WMiKxhvkw8DebjibTF1dwpzle6a8yEERreTzGpI5hC4n7cfxL%2FJ%2FSzeFJo%2BBtqMrelSyLIC0%2FI9gbmPY%2FyrVt4Xw1fMu3SqHpeFzmqFVUm9%2BBm6kiswzFo5H%2BL6Vp5BLneGY1hWS%2FJzgcZfCOqkOuWSRg0GtlNeR%2BXFcmfT0IY8xNoyrMJ9frv%2BM7Qpio9P1dY87Zwtj6pW275c9isC0D7e1vHcsvJcd3eQJYladL2kkodK1iE4DfbqABKhnQUYWMQZGc8uXswgm87esHVP7Hf%2BkA%2BFJMtM8iTuyXne8tHwR8K%2FutpjJjU&source=EKRP"
- // }
- // ],
- // "favFlag": false,
- // "repository": {
- // "title": null,
- // "type": "LAWS",
- // "dataset": "EKR_CLKL0817",
- // "resource": "EKRCLKL",
- // "ccl1": null,
- // "ccl2": null
- // },
- // "citationFormat": null,
- // "wwResources": [],
- // "patLawsStaList": []
- // },
- // "msg": "操作成功"
- // }.data;
let info = await requestJSON('https://kc.cnki.net/api/articleabstract/info', {
method: 'POST',
headers: {
@@ -220,7 +107,7 @@ async function scrape(doc, url = doc.location.href) {
Z.debug(info);
info = info.data;
function metadata(mainKey, list = false) {
- let result = info.metadata.find(obj => obj.key == mainKey);
+ const result = info.metadata.find(obj => obj.key == mainKey);
return result
? list
? result.list
@@ -231,15 +118,14 @@ async function scrape(doc, url = doc.location.href) {
}
try {
// throw new Error('debug');
- let doi = attr(doc, 'span.doi > a', 'href') || metadata('DOI');
+ const doi = attr(doc, 'span.doi > a', 'href') || metadata('DOI');
Z.debug(`DOI: ${doi}`);
newItem = await scrapeSearch(doi);
- extra.set('titleTranslation', metadata('MTTI'));
}
catch (error) {
Z.debug(`failed to use search translator.`);
Z.debug(error);
- let citation = info.citationFormat
+ const citation = info.citationFormat
? info.citationFormat[Object.keys(info.citationFormat).find(key => key.includes('GB/T'))]
: '';
Z.debug(citation);
@@ -253,7 +139,7 @@ async function scrape(doc, url = doc.location.href) {
newItem.pages = metadata('PAGE');
newItem.date = metadata('YE');
info.authors.forEach((creator) => {
- newItem.creators.push(processName(creator.value, 'author'));
+ newItem.creators.push(cleanAuthor(creator.value, 'author'));
});
break;
case 'patent':
@@ -265,7 +151,7 @@ async function scrape(doc, url = doc.location.href) {
newItem.filingDate = metadata('AD');
newItem.issueDate = metadata('PD');
metadata('AU', true).forEach((creator) => {
- newItem.creators.push(processName(creator.value, 'inventor'));
+ newItem.creators.push(cleanAuthor(creator.value, 'inventor'));
});
newItem.rights = metadata('ZQX');
break;
@@ -283,7 +169,7 @@ async function scrape(doc, url = doc.location.href) {
newItem.institution = metadata('AF');
newItem.date = metadata('YE') || metadata('RKNF');
metadata('AU', true).forEach((creator) => {
- newItem.creators.push(processName(creator.value));
+ newItem.creators.push(cleanAuthor(creator.value));
});
extra.set('achievementType', metadata('LBMC'));
break;
@@ -293,7 +179,7 @@ async function scrape(doc, url = doc.location.href) {
newItem.place = metadata('AD');
newItem.pages = metadata('PM');
info.authors.forEach((creator) => {
- newItem.creators.push(processName(creator.value, 'author'));
+ newItem.creators.push(cleanAuthor(creator.value, 'author'));
});
break;
case 'thesis':
@@ -305,10 +191,10 @@ async function scrape(doc, url = doc.location.href) {
newItem.numPages = metadata('PAGEC');
extra.set('major', metadata('SN'));
info.authors.forEach((creator) => {
- newItem.creators.push(processName(creator.value, 'author'));
+ newItem.creators.push(cleanAuthor(creator.value, 'author'));
});
metadata('TUS').split(/[;,;]\s?/).forEach((creator) => {
- newItem.creators.push(processName(creator, 'contributor'));
+ newItem.creators.push(cleanAuthor(creator, 'contributor'));
});
break;
case 'newspaperArticle':
@@ -316,7 +202,7 @@ async function scrape(doc, url = doc.location.href) {
newItem.date = metadata('PT');
newItem.pages = metadata('PV').replace(/0*(\d+)/, '$1');
info.authors.forEach((creator) => {
- newItem.creators.push(processName(creator.value, 'author'));
+ newItem.creators.push(cleanAuthor(creator.value, 'author'));
});
break;
case 'bookSection':
@@ -324,7 +210,7 @@ async function scrape(doc, url = doc.location.href) {
newItem.date = metadata('YE');
newItem.pages = metadata('PPM');
info.authors.forEach((creator) => {
- newItem.creators.push(processName(creator.value.replace(/\s?[总主参]编$/, ''), 'author'));
+ newItem.creators.push(cleanAuthor(creator.value.replace(/\s?[总主参]编$/, ''), 'editor'));
});
break;
case 'statute':
@@ -338,7 +224,7 @@ async function scrape(doc, url = doc.location.href) {
if (!metadata('XLJB').includes('法律')) extra.set('Type', 'regulation', true);
if (metadata('SXX') == '已失效') extra.set('Status', '已废止');
info.aff.forEach((creator) => {
- newItem.creators.push(processName(creator.value, 'author'));
+ newItem.creators.push(cleanAuthor(creator.value, 'author'));
});
break;
}
@@ -356,7 +242,7 @@ async function scrape(doc, url = doc.location.href) {
extra.set('filename', info.fileName);
extra.set('dbname', info.repository.resource.slice(-4));
extra.set('dbcode', info.repository.dataset.substring(4));
- let attachment = info.relations.reverse().find(obj => ['PDF', 'CAJ'].includes(obj.scope));
+ const attachment = info.relations.reverse().find(obj => ['PDF', 'CAJ'].includes(obj.scope));
newItem.attachments.push(attachment
? {
url: attachment.url,
@@ -369,7 +255,7 @@ async function scrape(doc, url = doc.location.href) {
}
);
newItem.tags = metadata('KY', true).map(obj => obj.value);
- let toc = info.co;
+ const toc = info.co;
if (toc) newItem.notes.push('Table of Contents
' + toc);
newItem.extra = extra.toString(newItem.extra);
newItem.complete();
@@ -378,7 +264,7 @@ async function scrape(doc, url = doc.location.href) {
async function scrapeSearch(doi) {
let item = {};
if (!doi) throw new ReferenceError('no identifier available');
- let translator = Zotero.loadTranslator('search');
+ const translator = Zotero.loadTranslator('search');
translator.setSearch({ DOI: doi });
translator.setHandler('translators', (_, translators) => {
translator.setTranslator(translators);
@@ -428,14 +314,6 @@ class Extra {
}
}
-/**
- * Attempts to get the part of the pattern described from the character,
- * and returns an empty string if not match.
- * @param {String} string
- * @param {RegExp} pattern
- * @param {Number} index
- * @returns
- */
function tryMatch(string, pattern, index = 0) {
if (!string) return '';
let match = string.match(pattern);
@@ -444,7 +322,7 @@ function tryMatch(string, pattern, index = 0) {
: '';
}
-function processName(creator, creatorType = 'author') {
+function cleanAuthor(creator, creatorType = 'author') {
creator = ZU.cleanAuthor(creator, creatorType);
if (/[\u4e00-\u9fa5]/.test(creator.lastName)) {
creator.lastName = creator.firstName + creator.lastName;
@@ -1147,5 +1025,4 @@ var testCases = [
]
}
]
-
/** END TEST CASES **/
diff --git a/Douban.js b/Douban.js
index 8392d197..7b1be843 100644
--- a/Douban.js
+++ b/Douban.js
@@ -1,7 +1,7 @@
{
"translatorID": "fc353b26-8911-4c34-9196-f6f567c93901",
"label": "Douban",
- "creator": "不是船长, Ace Strong, Zeping Lee",
+ "creator": "不是船长, Ace Strong, Zeping Lee, jiaojiaodubai",
"target": "^https?://\\w+\\.douban\\.com",
"minVersion": "2.0rc1",
"maxVersion": "",
@@ -9,13 +9,13 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
- "lastUpdated": "2024-10-04 22:35:42"
+ "lastUpdated": "2024-11-15 03:16:28"
}
/*
***** BEGIN LICENSE BLOCK *****
- Copyright © 2009-2022 Tao Cheng, Zeping Lee
+ Copyright © 2009-2022 Tao Cheng, Zeping Lee; 2024 jiaojiaodubai
This file is part of Zotero.
@@ -398,9 +398,10 @@ function processName(fullName, defaultType) {
Z.debug(fullName);
const creatorTypMap = {
// https://book.douban.com/subject/34659228/
- author: /[编著绘]+/,
- translator: /翻?译/,
- contributor: /[审校注]+/
+ author: /[著绘]+$/,
+ editor: /编$/,
+ translator: /翻?译$/,
+ contributor: /[审校注]+$/
};
const remark = tryMatch(fullName, /[[((【[](.+)[]】))\]]$/, 1);
fullName = fullName.replace(/[[((【[](.+)[]】))\]]$/, '');
diff --git a/Duxiu.js b/Duxiu.js
index a1931d49..16ad03d2 100644
--- a/Duxiu.js
+++ b/Duxiu.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
- "lastUpdated": "2024-10-28 08:36:35"
+ "lastUpdated": "2024-11-15 03:30:15"
}
/*
@@ -119,18 +119,23 @@ async function scrape(doc, url = doc.location.href) {
const extra = new Extra();
const newItem = new Z.Item(detectWeb(doc, url));
newItem.title = ZU.trimInternal(text(doc, '.card_text > dl > dt'));
- newItem.abstractNote = getField(['摘要', '内容提要', '简介']).replace(/\s*隐藏更多$/, '');
+ newItem.abstractNote = getField(['摘要', '内容提要', '简介']).replace(/^: |\s*隐藏更多$/g, '');
const creatorsExt = [];
getField(['作者', '发明人']).split(/[;;]\s*/)
.forEach((group) => {
const creators = group.split(/[,,]\s*/);
- const creatorType = /翻?译$/.test(creators[creators.length - 1])
- ? 'translator'
- : 'author';
+ const role = creators[creators.length - 1];
+ let creatorType = 'author';
+ if (/编纂?$/.test(role)) {
+ creatorType = 'editor';
+ }
+ else if (/翻?译$/.test(role)) {
+ creatorType = 'translator';
+ }
creators.forEach((creator) => {
creator = creator
.replace(/^:/, '')
- .replace(/[主编著翻译\d\s]*$/g, '');
+ .replace(/[主副参][编纂著翻译]+[\d\s]*$/g, '');
const country = tryMatch(creator, /^((.+?))/, 1);
creator = creator.replace(/^(.+?)/, '');
const original = tryMatch(creator, /((.+?))$/, 1);
@@ -378,7 +383,7 @@ var testCases = [
"place": "北京",
"publisher": "机械工业出版社",
"series": "计算机科学丛书 华章教育",
- "url": "https://book.duxiu.com/views/specific/4010/bookDetail.jsp?dxNumber=000030591379",
+ "url": "https://book.duxiu.com/views/specific/4010/bookDetail.jsp?dxNumber=000030591379&d=AAB0ABB8D0C543BFF26956EE9601E809",
"attachments": [],
"tags": [
{
diff --git a/Modern History.js b/Modern History.js
index e71486a0..5d190e1c 100644
--- a/Modern History.js
+++ b/Modern History.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
- "lastUpdated": "2024-02-03 10:18:43"
+ "lastUpdated": "2024-11-26 00:52:50"
}
/*
@@ -46,35 +46,35 @@ const typeMap = {
};
function detectUrl(url) {
- let typeKey = Object.keys(typeMap).find(key => url.includes(`_${key}_`));
+ const typeKey = Object.keys(typeMap).find(key => url.includes(`_${key}_`));
if (typeKey) {
- let itemType = typeMap[typeKey];
+ const itemType = typeMap[typeKey];
if (itemType == 'book') {
return /treeId=[^&/]+/.test(url)
? 'bookSection'
: 'book';
}
- else if (itemType == 'document') {
- return /dirCode=[^&/]+/.test(url)
- ? 'document'
+ else if (['document', 'artwork'].includes(itemType)) {
+ return /\/Detailedreading(Video)?\?/i.test(url)
+ ? itemType
: 'multiple';
}
- else if (itemType == 'journalArticle') {
- return /treeId=[^&/]+/.test(url)
- ? 'journalArticle'
- : 'multiple';
+ else if (itemType == 'journalArticle' && /uniqTag=[^&/]+/.test(url)) {
+ return itemType;
+ }
+ else {
+ return itemType;
}
- return itemType;
}
return '';
}
function detectWeb(doc, url) {
- // let tree = doc.querySelector('ul[role="tree"]');
- // if (tree) {
- // Z.monitorDOMChanges(tree, { childList: tree, subtree: true });
- // }
- let itemType = detectUrl(url);
+ const app = doc.querySelector('#app');
+ if (app) {
+ Z.monitorDOMChanges(app, { childList: true, subtree: true });
+ }
+ const itemType = detectUrl(url);
if (itemType) {
return itemType;
}
@@ -85,13 +85,14 @@ function detectWeb(doc, url) {
}
function getSearchResults(doc, checkOnly) {
- var items = {};
- var found = false;
- var rows = doc.querySelectorAll('.items > .item');
- for (let row of rows) {
- let imgUrl = attr(row, Object.keys(typeMap).map(key => `img[src*="_${key}_"]`).join(', '), 'src');
- let fileCode = tryMatch(imgUrl, /\/(\d+_[a-z]{2}_\d+)\//, 1);
- let title = ZU.trimInternal(text(row, 'h5'));
+ const items = {};
+ let found = false;
+ const rows = doc.querySelectorAll('.items > .item');
+ for (const row of rows) {
+ const imgUrl = attr(row, Object.keys(typeMap).map(key => `img[src*="_${key}_"]`).join(', '), 'src');
+ const fileCode = tryMatch(imgUrl, /\/(\d+_[a-z]{2}_\d+)\//, 1);
+ const title = ZU.trimInternal(text(row, 'h5'));
+ // 档案和期刊应在出版物详情中抓取具体篇目
if (!fileCode || !title || /_(da|qk)_/.test(fileCode)) continue;
if (checkOnly) return true;
found = true;
@@ -101,37 +102,42 @@ function getSearchResults(doc, checkOnly) {
}
async function doWeb(doc, url) {
- let fileCode = tryMatch(url, /fileCode=(\d+_[a-z]{2}_\d+)/, 1);
- let itemType = detectWeb(doc, url);
+ const itemType = detectWeb(doc, url);
+ const fileCode = tryMatch(url, /fileCode=(\d+_[a-z]{2}_\d+)/, 1);
if (itemType == 'multiple') {
- var items = {};
+ let items = {};
+ // 位于档案中时,能够获取到fileCode
if (fileCode) {
- let directory = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}`);
- directory = directory.result;
+ const respond = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}`);
+ Z.debug(respond);
+ let directory = respond.result;
+ Z.debug(directory);
let page = text(doc, '.number.active');
if (page) {
page = Number(page);
directory = directory.slice((page - 1) * 10, page * 10);
}
- directory.forEach(item => items[item.directoryCode] = item.label);
- // Z.debug(directory);
+ Z.debug(directory);
+ directory.forEach(dir => items[dir.directoryCode] = dir.label);
items = await Zotero.selectItems(items);
- // Z.debug(items);
if (!items) return;
- for (let dirCode of Object.keys(items)) {
- await scrape(
- fileCode,
- url + `&dirCode=${encodeURIComponent(dirCode)}&treeId=${directory.find(item => item.directoryCode = dirCode).startPageId}`,
- directory
- );
+ for (const dirCode of Object.keys(items)) {
+ const obj = findObj(directory, 'directoryCode', dirCode);
+ url = encodeURI('https://www.modernhistory.org.cn/#/Detailedreading?'
+ + `fileCode=${fileCode}&`
+ + `treeId=${obj.startPageId}&`
+ + `imageUrl=${obj.iiifObj.imgUrl}&`
+ + `dirCode=${obj.directoryCode}&`
+ + `uniqTag=${obj.iiifObj.uniqTag}&`
+ + `contUrl=${obj.iiifObj.jsonUrl}`);
+ await scrape(doc, url, obj);
}
}
+ // 在搜索页面,fileCode来自页面元素
else {
- let items = await Zotero.selectItems(getSearchResults(doc, false));
+ items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
- for (let fileCode of Object.keys(items)) {
- let itemType = detectUrl(fileCode);
- Z.debug(itemType);
+ for (const fileCode of Object.keys(items)) {
url = 'https://www.modernhistory.org.cn/#/'
+ {
book: 'DocumentDetails_ts_da',
@@ -139,182 +145,198 @@ async function doWeb(doc, url) {
artwork: 'DocumentDetails_tp',
// 尚未明确“hc”是什么意思
videoRecording: 'DocumentDetails_ysp_hc'
- }[itemType]
+ }[detectUrl(fileCode)]
+ `?fileCode=${fileCode}`;
- await scrape(fileCode, url);
+ await scrape(doc, url);
}
}
}
else {
- await scrape(fileCode, url);
+ await scrape(doc, url);
}
}
-async function scrape(fileCode, url, directory) {
- Z.debug(`fileCode: ${fileCode}`);
+async function scrape(doc, url, obj) {
Z.debug(`url: ${url}`);
- Z.debug('director:');
- Z.debug(directory);
- var file = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDetailsInfo/${fileCode}`);
- // let file = fileObj;
- cleanObject(file);
- file = file.result.info;
- Z.debug('file:');
- Z.debug(file);
- var newItem = new Z.Item(detectUrl(url));
+ const fileCode = tryMatch(url, /fileCode=(\d+_[a-z]{2}_\d+)/, 1);
+ const respond = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDetailsInfo/${fileCode}`);
+ cleanObject(respond);
+ const info = respond.result.info;
+ Z.debug('info:');
+ Z.debug(info);
+ const newItem = new Z.Item(detectUrl(url));
+ const extra = new Extra();
newItem.language = {
汉语: 'zh-CN',
日语: 'jp-JP',
英语: 'en-US'
- }[file.language[0]];
- newItem.url = tryMatch(url, /^.+fileCode=\w+/);
+ }[info.language[0]];
+ newItem.url = url;
newItem.libraryCatalog = '抗日战争与近代中日关系文献数据平台';
- newItem.archive = file.orgName;
- var creators = [
- ...processName(file.firstResponsible, file.firstResponsibleNation, file.firstCreationWay, 'author'),
- ...processName(file.secondResponsible, file.secondResponsibleNation, file.secondCreationWay, 'contributor')
+ newItem.archive = info.orgName;
+ let creators = [
+ ...processNames(info.firstResponsible, info.firstResponsibleNation, info.firstCreationWay, 'author'),
+ ...processNames(info.secondResponsible, info.secondResponsibleNation, info.secondCreationWay, 'contributor')
];
switch (newItem.itemType) {
case 'book': {
- newItem.title = file.title;
- newItem.series = file.seriesVolume;
- newItem.seriesNumber = file.seriesVolume;
- newItem.edition = file.version;
- newItem.place = file.place[0];
- newItem.publisher = file.publisher[0];
- newItem.date = ZU.strToISO(file.publishTime);
- newItem.numPages = file.pageAmount;
+ newItem.title = info.title;
+ newItem.series = info.seriesVolume;
+ newItem.seriesNumber = info.seriesVolume;
+ newItem.edition = info.version;
+ newItem.place = info.place[0];
+ newItem.publisher = info.publisher[0];
+ newItem.date = ZU.strToISO(info.publishTime);
+ newItem.numPages = info.pageAmount;
break;
}
case 'bookSection': {
- if (!directory) {
- directory = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}?uniqTag=`);
- Z.debug(directory);
- directory = directory.result;
+ if (!obj) {
+ const respond = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}?uniqTag=`);
+ obj = findObj(respond.result, 'startPageId', tryMatch(url, /treeId=([^&/]+)/, 1));
}
- let dir = directory.find(child => child.startPageId == tryMatch(url, /treeId=([^&/]+)/, 1));
- // let dir = dirObj;
- cleanObject(dir);
- Z.debug('dir:');
- Z.debug(dir);
- newItem.title = dir.label;
- newItem.bookTitle = file.title;
- newItem.series = file.seriesVolume;
- newItem.seriesNumber = file.seriesVolume;
- newItem.edition = file.version;
- newItem.place = file.place[0];
- newItem.publisher = file.publisher[0];
- newItem.date = ZU.strToISO(file.publishTime);
- newItem.pages = Array.from(new Set([dir.startPage, dir.endPage])).join('-');
+ cleanObject(obj);
+ newItem.title = obj.label;
+ newItem.bookTitle = info.title;
+ newItem.series = info.seriesVolume;
+ newItem.seriesNumber = info.seriesVolume;
+ newItem.edition = info.version;
+ newItem.place = info.place[0];
+ newItem.publisher = info.publisher[0];
+ newItem.date = ZU.strToISO(info.publishTime);
+ newItem.pages = Array.from(new Set([obj.startPage, obj.endPage])).join('-');
+ creators.forEach((creator) => {
+ if (creator.creatorType == 'author') {
+ creator.creatorType = 'bookAuthor';
+ }
+ });
break;
}
case 'document': {
- if (!directory) {
- directory = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}?uniqTag=`);
- Z.debug(directory);
- directory = directory.result;
+ if (!obj) {
+ const respond = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}?uniqTag=`);
+ obj = findObj(respond.result, 'directoryCode', decodeURIComponent(tryMatch(url, /dirCode=([^&/]+)/, 1)));
}
- let dir = directory.find(child => child.directoryCode == decodeURIComponent(tryMatch(url, /dirCode=([^&/]+)/, 1)));
- // let dir = dirObj;
- cleanObject(dir);
- Z.debug('dir:');
- Z.debug(dir);
- newItem.title = dir.label || `${file.title} ${dir.directoryCode}`;
- newItem.abstractNote = file.roundup;
- newItem.place = file.place[0];
- newItem.publisher = file.publisher[0];
- newItem.date = ZU.strToISO(file.publishTime);
- newItem.archiveLocation = file.title + dir.directoryCode;
- extra.add('Type', 'collection', true);
- extra.add('numPages', dir.endPage, true);
+ cleanObject(obj);
+ newItem.title = obj.label || `${info.title} ${obj.directoryCode}`;
+ newItem.abstractNote = info.roundup;
+ newItem.publisher = info.publisher[0];
+ newItem.date = ZU.strToISO(info.publishTime);
+ newItem.archiveLocation = `${info.title}${/^[a-z]/i.test(obj.directoryCode) ? ' ' : ''}${obj.directoryCode}`;
+ extra.set('container-title', info.title, true);
+ extra.set('type', 'collection', true);
+ extra.set('place', info.place[0], true);
+ extra.set('numPages', obj.endPage, true);
break;
}
case 'journalArticle': {
- if (!directory) {
+ if (!obj) {
// 在Scaffold中失败
- directory = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}?uniqTag=`);
- Z.debug(directory);
- directory = directory.result;
+ const respond = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}?uniqTag=${tryMatch(url, /uniqTag=([^&/]+)/, 1)}`);
+ obj = findObj(respond.result, 'directoryCode', tryMatch(url, /dirCode=([^&/]+)/, 1));
}
- let dir = directory.find(child => child.startPageId == tryMatch(url, /treeId=([^&/]+)/, 1));
- // let dir = dirObj;
- cleanObject(dir);
- Z.debug('dir:');
- Z.debug(dir);
- let title = dir.label;
- let names = [];
- let countries = [];
- let role = [];
- tryMatch(title, /【(.+?)】$/, 1).split(',').forEach((creator) => {
- countries.push(tryMatch(creator, /^\((.+?)\)/, 1));
- creator = creator.replace(/^\(.+?\)/, '');
- role.push(tryMatch(creator, /\(?(翻?译)\)?$/, 1));
- creator = creator.replace(/\(?(翻?译|[主原]?[编著])\)?$/, '');
- names.push(creator);
+ cleanObject(obj);
+ const title = obj.label;
+ const names = [];
+ const countries = [];
+ const role = [];
+ tryMatch(title, /【(.+?)】$/, 1).split(',').forEach((name) => {
+ countries.push(tryMatch(name, /^\((.+?)\)/, 1));
+ name = name.replace(/^\(.+?\)/, '');
+ role.push(tryMatch(name, /\(([^)]+)\)$/, 1));
+ name = name.replace(/\([^)]+\)$/g, '');
+ names.push(name);
});
- creators = processName(names, countries, role, 'author');
+ creators = processNames(names, countries, role, 'author');
newItem.title = title.replace(/【.+?】$/, '');
- newItem.publicationTitle = file.title;
- let volumeInfo = dir.iiifObj.volumeInfo;
- newItem.volume = dir.volumeNo || toArabicNum(tryMatch(volumeInfo, /第(.+?)卷/, 1));
+ newItem.publicationTitle = info.title;
+ let volumeInfo = obj.iiifObj.volumeInfo;
+ newItem.volume = obj.volumeNo || toArabicNum(tryMatch(volumeInfo, /第(.+?)卷/, 1));
newItem.issue = toArabicNum(tryMatch(volumeInfo, /第(.+?)[期号]/, 1));
- newItem.pages = Array.from(new Set([dir.startPage, dir.endPage])).join('-');
- newItem.date = ZU.strToISO(file.publishTime) || ZU.strToISO(volumeInfo);
+ newItem.pages = Array.from(new Set([obj.startPage, obj.endPage])).join('-');
+ newItem.date = ZU.strToISO(info.publishTime) || ZU.strToISO(volumeInfo);
break;
}
case 'audioRecording':
- newItem.title = file.title;
- newItem.audioRecordingFormat = file.docFormat;
- newItem.label = file.publisher[0];
- extra.add('place', file.place[0], true);
- extra.add('genre', 'Album', true);
+ newItem.title = info.title;
+ newItem.audioRecordingFormat = info.docFormat;
+ newItem.label = info.publisher[0];
+ extra.set('place', info.place[0], true);
+ extra.set('genre', 'Album', true);
break;
case 'artwork':
- newItem.title = file.title;
- newItem.date = file.timeRange;
+ if (!obj) {
+ const respond = await requestJSON(`https://www.modernhistory.org.cn/backend-prod/esBook/findDirectory/${fileCode}?uniqTag=`);
+ obj = findObj(respond.result, 'directoryCode', tryMatch(url, /dirCode=([^&/]+)/, 1));
+ }
+ newItem.title = obj.label;
+ newItem.date = info.timeRange;
newItem.artworkMedium = 'photography';
- extra.add('amount', file.amount);
+ extra.set('container-title', info.title, true);
+ extra.set('amount', info.amount);
break;
case 'videoRecording':
- newItem.title = file.title;
- newItem.abstractNote = file.notes;
- newItem.videoRecordingFormat = file.docFormat;
- newItem.place = file.place[0];
- newItem.studio = file.publisher[0];
- newItem.date = file.createTimeStr;
- newItem.runningTime = file.duration || ZU.strToISO(file.createTime);
+ newItem.title = info.title;
+ newItem.abstractNote = info.notes;
+ newItem.videoRecordingFormat = info.docFormat;
+ newItem.place = info.place[0];
+ newItem.studio = info.publisher[0];
+ newItem.date = info.createTimeStr;
+ newItem.runningTime = info.duration || ZU.strToISO(info.createTime);
+ extra.set('download', info.download);
break;
}
if (creators.some(creator => creator.country)) {
- extra.add('creatorsExt', JSON.stringify(creators));
+ extra.set('creatorsExt', JSON.stringify(creators));
}
- Z.debug(creators);
creators.forEach((creator) => {
delete creator.country;
newItem.creators.push(creator);
});
- newItem.tags = file.keyWords || [];
+ newItem.tags = info.keyWords || [];
+ newItem.attachments.push({
+ title: 'Snapshot',
+ document: doc
+ });
newItem.extra = extra.toString();
newItem.complete();
}
-const extra = {
- clsFields: [],
- elseFields: [],
- add: function (key, value, cls = false) {
- if (value && cls) {
- this.clsFields.push([key, value]);
+class Extra {
+ constructor() {
+ this.fields = [];
+ }
+
+ push(key, val, csl = false) {
+ this.fields.push({ key: key, val: val, csl: csl });
+ }
+
+ set(key, val, csl = false) {
+ let target = this.fields.find(obj => new RegExp(`^${key}$`, 'i').test(obj.key));
+ if (target) {
+ target.val = val;
}
- else if (value) {
- this.elseFields.push([key, value]);
+ else {
+ this.push(key, val, csl);
}
- },
- toString: function () {
- return [...this.clsFields, ...this.elseFields]
- .map(entry => `${entry[0]}: ${entry[1]}`)
- .join('\n');
}
-};
+
+ get(key) {
+ let result = this.fields.find(obj => new RegExp(`^${key}$`, 'i').test(obj.key));
+ return result
+ ? result.val
+ : '';
+ }
+
+ toString(history = '') {
+ this.fields = this.fields.filter(obj => obj.val);
+ return [
+ this.fields.filter(obj => obj.csl).map(obj => `${obj.key}: ${obj.val}`).join('\n'),
+ history,
+ this.fields.filter(obj => !obj.csl).map(obj => `${obj.key}: ${obj.val}`).join('\n')
+ ].filter(obj => obj).join('\n');
+ }
+}
function tryMatch(string, pattern, index = 0) {
let match = string.match(pattern);
@@ -323,20 +345,34 @@ function tryMatch(string, pattern, index = 0) {
: '';
}
-function processName(names, countries, role, creatorType) {
+function processNames(names, countries, role, creatorType) {
+ const creators = [];
names = names || [];
countries = countries || [];
role = role || [];
for (let i = 0; i < names.length; i++) {
- names[i] = {
- firstName: '',
- lastName: names[i],
- creatorType: /[翻译]+/.test(role[i]) ? 'translator' : creatorType,
- fieldMode: 1,
- country: countries[i] || ''
- };
+ const creatorTypes = [];
+ if (/翻?译/.test(role[i])) {
+ creatorTypes.push('translator');
+ }
+ if (/[编辑纂]+|记/.test(role[i])) {
+ creatorTypes.push('editor');
+ }
+ if (!creatorTypes.length) {
+ creatorTypes.push(creatorType);
+ }
+ for (const type of creatorTypes) {
+ creators.push({
+ firstName: '',
+ // https://www.modernhistory.org.cn/#/Detailedreading?docType=qk&fileCode=9999_qk_05009&treeId=105495871&qkTitle=No.2%283%E6%9C%88%29&uniqTag=9999_qk_05009_0002&dirCode=ec97be75bf774836a1244e98686ca1bf&contUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_qk_05009%2F9999_qk_05009_0002%2F9999_qk_05009_0002.json
+ lastName: names[i].replace(/\.\s*/, '. '),
+ creatorType: type,
+ fieldMode: 1,
+ country: countries[i] || ''
+ });
+ }
}
- return names;
+ return creators;
}
function toArabicNum(zhNum) {
@@ -405,11 +441,25 @@ function cleanObject(object) {
}
}
+function findObj(tree, attribute, value) {
+ for (const branch of tree) {
+ if (branch[attribute] == value) {
+ return branch;
+ }
+ else {
+ const result = findObj(branch.children, attribute, value);
+ if (result) return result;
+ }
+ }
+ return undefined;
+}
+
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "https://www.modernhistory.org.cn/#/DocumentDetails_ts_da?fileCode=9999_ts_00553714&title=%E8%BE%A9%E8%AF%81%E5%94%AF%E7%89%A9%E4%B8%BB%E4%B9%89%E4%B8%8E%E5%8E%86%E5%8F%B2%E5%94%AF%E7%89%A9%E4%B8%BB%E4%B9%89&flag=false",
+ "defer": true,
"items": [
{
"itemType": "book",
@@ -436,8 +486,13 @@ var testCases = [
"libraryCatalog": "抗日战争与近代中日关系文献数据平台",
"numPages": "61",
"place": "上海",
- "url": "https://www.modernhistory.org.cn/#/DocumentDetails_ts_da?fileCode=9999_ts_00553714",
- "attachments": [],
+ "url": "https://www.modernhistory.org.cn/#/DocumentDetails_ts_da?fileCode=9999_ts_00553714&title=%E8%BE%A9%E8%AF%81%E5%94%AF%E7%89%A9%E4%B8%BB%E4%B9%89%E4%B8%8E%E5%8E%86%E5%8F%B2%E5%94%AF%E7%89%A9%E4%B8%BB%E4%B9%89&flag=false",
+ "attachments": [
+ {
+ "title": "Snapshot",
+ "mimeType": "text/html"
+ }
+ ],
"tags": [
{
"tag": "历史唯物主义"
@@ -457,6 +512,7 @@ var testCases = [
{
"type": "web",
"url": "https://www.modernhistory.org.cn/#/Detailedreading?fileCode=9999_ts_00316571&treeId=145458905&contUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_ts_00316571%2F9999_ts_00316571.json",
+ "defer": true,
"items": [
{
"itemType": "bookSection",
@@ -465,13 +521,13 @@ var testCases = [
{
"firstName": "",
"lastName": "马克思",
- "creatorType": "author",
+ "creatorType": "bookAuthor",
"fieldMode": 1
},
{
"firstName": "",
"lastName": "恩格斯",
- "creatorType": "author",
+ "creatorType": "bookAuthor",
"fieldMode": 1
},
{
@@ -484,13 +540,18 @@ var testCases = [
"date": "1949-01-01",
"archive": "社会来源",
"bookTitle": "艺术的真实",
- "extra": "creatorsExt: [{\"firstName\":\"\",\"lastName\":\"马克思\",\"creatorType\":\"author\",\"fieldMode\":1,\"country\":\"德国\"},{\"firstName\":\"\",\"lastName\":\"恩格斯\",\"creatorType\":\"author\",\"fieldMode\":1,\"country\":\"\"},{\"firstName\":\"\",\"lastName\":\"郭沫若\",\"creatorType\":\"translator\",\"fieldMode\":1,\"country\":\"\"}]",
+ "extra": "creatorsExt: [{\"firstName\":\"\",\"lastName\":\"马克思\",\"creatorType\":\"bookAuthor\",\"fieldMode\":1,\"country\":\"德国\"},{\"firstName\":\"\",\"lastName\":\"恩格斯\",\"creatorType\":\"bookAuthor\",\"fieldMode\":1,\"country\":\"\"},{\"firstName\":\"\",\"lastName\":\"郭沫若\",\"creatorType\":\"translator\",\"fieldMode\":1,\"country\":\"\"}]",
"language": "zh-CN",
"libraryCatalog": "抗日战争与近代中日关系文献数据平台",
"pages": "9-16",
"place": "上海",
- "url": "https://www.modernhistory.org.cn/#/Detailedreading?fileCode=9999_ts_00316571",
- "attachments": [],
+ "url": "https://www.modernhistory.org.cn/#/Detailedreading?fileCode=9999_ts_00316571&treeId=145458905&contUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_ts_00316571%2F9999_ts_00316571.json",
+ "attachments": [
+ {
+ "title": "Snapshot",
+ "mimeType": "text/html"
+ }
+ ],
"tags": [
{
"tag": "具体与抽象"
@@ -509,39 +570,42 @@ var testCases = [
},
{
"type": "web",
- "url": "https://www.modernhistory.org.cn/#/DocumentDetails_ysp_hc?fileCode=0002_sp_00000001&title=%E4%B8%9C%E4%BA%AC%E5%AE%A1%E5%88%A4&flag=false",
+ "url": "https://www.modernhistory.org.cn/#/Detailedreading?docType=qk&fileCode=9999_qk_05012&treeId=106127676&qkTitle=%E7%AC%AC%E4%BA%8C%E5%8D%B7%E7%AC%AC%E5%9B%9B%E5%8F%B7%285%E6%9C%8820%E6%97%A5%29&uniqTag=9999_qk_05012_0010&dirCode=c3dc322a36e74ec99019e5a9c1689994&contUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_qk_05012%2F9999_qk_05012_0002%2F9999_qk_05012_0002.json",
+ "defer": true,
"items": [
{
- "itemType": "videoRecording",
- "title": "东京审判",
+ "itemType": "journalArticle",
+ "title": "新兴艺术概况",
"creators": [
{
"firstName": "",
- "lastName": "东京国际军事法庭",
- "creatorType": "author",
+ "lastName": "冯宪章",
+ "creatorType": "translator",
+ "fieldMode": 1
+ },
+ {
+ "firstName": "",
+ "lastName": "冯宪章",
+ "creatorType": "editor",
"fieldMode": 1
}
],
- "date": "1946",
- "abstractNote": "本数据经上海交通大学出版社授权发布,仅供学术研究使用用。以任何形式用于商业目的,请务必与版权方联系。",
- "archive": "上海交通大学东京审判研究中心",
+ "date": "1930-05-20",
+ "archive": "社会来源",
+ "issue": 4,
"language": "zh-CN",
"libraryCatalog": "抗日战争与近代中日关系文献数据平台",
- "runningTime": "6:43:31",
- "url": "https://www.modernhistory.org.cn/#/DocumentDetails_ysp_hc?fileCode=0002_sp_00000001",
- "videoRecordingFormat": "mp4",
- "attachments": [],
- "tags": [
- {
- "tag": "东京审判"
- },
+ "pages": "452",
+ "publicationTitle": "大众文艺",
+ "url": "https://www.modernhistory.org.cn/#/Detailedreading?docType=qk&fileCode=9999_qk_05012&treeId=106127676&qkTitle=%E7%AC%AC%E4%BA%8C%E5%8D%B7%E7%AC%AC%E5%9B%9B%E5%8F%B7%285%E6%9C%8820%E6%97%A5%29&uniqTag=9999_qk_05012_0010&dirCode=c3dc322a36e74ec99019e5a9c1689994&contUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_qk_05012%2F9999_qk_05012_0002%2F9999_qk_05012_0002.json",
+ "volume": 2,
+ "attachments": [
{
- "tag": "伪满洲国"
- },
- {
- "tag": "关东军"
+ "title": "Snapshot",
+ "mimeType": "text/html"
}
],
+ "tags": [],
"notes": [],
"seeAlso": []
}
@@ -550,6 +614,7 @@ var testCases = [
{
"type": "web",
"url": "https://www.modernhistory.org.cn/#/DetailedreadingVideo?docType=yp&id=71729&fileCode=6666_yp_00000022&treeId=134",
+ "defer": true,
"items": [
{
"itemType": "audioRecording",
@@ -574,8 +639,13 @@ var testCases = [
"label": "中国唱片深圳公司",
"language": "zh-CN",
"libraryCatalog": "抗日战争与近代中日关系文献数据平台",
- "url": "https://www.modernhistory.org.cn/#/DetailedreadingVideo?docType=yp&id=71729&fileCode=6666_yp_00000022",
- "attachments": [],
+ "url": "https://www.modernhistory.org.cn/#/DetailedreadingVideo?docType=yp&id=71729&fileCode=6666_yp_00000022&treeId=134",
+ "attachments": [
+ {
+ "title": "Snapshot",
+ "mimeType": "text/html"
+ }
+ ],
"tags": [],
"notes": [],
"seeAlso": []
@@ -584,11 +654,12 @@ var testCases = [
},
{
"type": "web",
- "url": "https://www.modernhistory.org.cn/#/DocumentDetails_tp?fileCode=9999_tp_00000006&title=%E9%A6%99%E6%B8%AF%E5%8D%8E%E8%8A%B3%E7%85%A7%E7%9B%B8%E9%A6%86%EF%BC%88AFong%EF%BC%89%E6%91%84%E5%BD%B1%E9%9B%86&flag=false",
+ "url": "https://www.modernhistory.org.cn/#/Detailedreading?fileCode=9999_tp_00000006&treeId=196970787&imageUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_tp_00000006%2F9999_tp_00000006_00002.jpg&dirCode=1b5e6954e5204328bc849743600d66ec",
+ "defer": true,
"items": [
{
"itemType": "artwork",
- "title": "香港华芳照相馆(AFong)摄影集",
+ "title": "自南向北拍摄的通州码头",
"creators": [
{
"firstName": "",
@@ -600,10 +671,49 @@ var testCases = [
"date": "1879",
"archive": "社会来源",
"artworkMedium": "photography",
- "extra": "amount: 95\ncreatorsExt: [{\"firstName\":\"\",\"lastName\":\"香港华芳照相馆\",\"creatorType\":\"author\",\"fieldMode\":1,\"country\":\"中国\"},{\"firstName\":\"\",\"lastName\":\"\",\"creatorType\":\"contributor\",\"fieldMode\":1,\"country\":\"\"}]",
+ "extra": "container-title: 香港华芳照相馆(AFong)摄影集\namount: 95\ncreatorsExt: [{\"firstName\":\"\",\"lastName\":\"香港华芳照相馆\",\"creatorType\":\"author\",\"fieldMode\":1,\"country\":\"中国\"},{\"firstName\":\"\",\"lastName\":\"\",\"creatorType\":\"contributor\",\"fieldMode\":1,\"country\":\"\"}]",
"libraryCatalog": "抗日战争与近代中日关系文献数据平台",
- "url": "https://www.modernhistory.org.cn/#/DocumentDetails_tp?fileCode=9999_tp_00000006",
- "attachments": [],
+ "url": "https://www.modernhistory.org.cn/#/Detailedreading?fileCode=9999_tp_00000006&treeId=196970787&imageUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_tp_00000006%2F9999_tp_00000006_00002.jpg&dirCode=1b5e6954e5204328bc849743600d66ec",
+ "attachments": [
+ {
+ "title": "Snapshot",
+ "mimeType": "text/html"
+ }
+ ],
+ "tags": [],
+ "notes": [],
+ "seeAlso": []
+ }
+ ]
+ },
+ {
+ "type": "web",
+ "url": "https://www.modernhistory.org.cn/#/Detailedreading?fileCode=9999_tp_00000005&treeId=196970737&imageUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_tp_00000005%2F9999_tp_00000005_00002.jpg&dirCode=99435ef71d6640c689ec4886d96cd8d0",
+ "defer": true,
+ "items": [
+ {
+ "itemType": "artwork",
+ "title": "在雅宾利道自南向北拍摄维多利亚湾",
+ "creators": [
+ {
+ "firstName": "",
+ "lastName": "威廉·普瑞尔·弗洛伊德",
+ "creatorType": "author",
+ "fieldMode": 1
+ }
+ ],
+ "date": "1873",
+ "archive": "社会来源",
+ "artworkMedium": "photography",
+ "extra": "container-title: 威廉·普瑞尔·弗洛伊德(William Pryor Floyd)摄影集\namount: 50\ncreatorsExt: [{\"firstName\":\"\",\"lastName\":\"威廉·普瑞尔·弗洛伊德\",\"creatorType\":\"author\",\"fieldMode\":1,\"country\":\"英国\"},{\"firstName\":\"\",\"lastName\":\"\",\"creatorType\":\"contributor\",\"fieldMode\":1,\"country\":\"\"}]",
+ "libraryCatalog": "抗日战争与近代中日关系文献数据平台",
+ "url": "https://www.modernhistory.org.cn/#/Detailedreading?fileCode=9999_tp_00000005&treeId=196970737&imageUrl=https%3A%2F%2Fkrwxk-prod.oss-cn-beijing.aliyuncs.com%2F9999_tp_00000005%2F9999_tp_00000005_00002.jpg&dirCode=99435ef71d6640c689ec4886d96cd8d0",
+ "attachments": [
+ {
+ "title": "Snapshot",
+ "mimeType": "text/html"
+ }
+ ],
"tags": [],
"notes": [],
"seeAlso": []
@@ -613,6 +723,7 @@ var testCases = [
{
"type": "web",
"url": "https://www.modernhistory.org.cn/#/DocumentDetails_ysp_hc?fileCode=0002_sp_00000001&title=%E4%B8%9C%E4%BA%AC%E5%AE%A1%E5%88%A4&flag=false",
+ "defer": true,
"items": [
{
"itemType": "videoRecording",
@@ -631,9 +742,14 @@ var testCases = [
"language": "zh-CN",
"libraryCatalog": "抗日战争与近代中日关系文献数据平台",
"runningTime": "6:43:31",
- "url": "https://www.modernhistory.org.cn/#/DocumentDetails_ysp_hc?fileCode=0002_sp_00000001",
+ "url": "https://www.modernhistory.org.cn/#/DocumentDetails_ysp_hc?fileCode=0002_sp_00000001&title=%E4%B8%9C%E4%BA%AC%E5%AE%A1%E5%88%A4&flag=false",
"videoRecordingFormat": "mp4",
- "attachments": [],
+ "attachments": [
+ {
+ "title": "Snapshot",
+ "mimeType": "text/html"
+ }
+ ],
"tags": [
{
"tag": "东京审判"
@@ -652,12 +768,20 @@ var testCases = [
},
{
"type": "web",
- "url": "https://www.modernhistory.org.cn/#/DocumentDetails_da?fileCode=0017_da_0006&title=%E8%A5%BF%E5%8D%97%E5%A4%AA%E5%B9%B3%E6%B4%8B%E6%88%98%E5%8C%BA%E6%97%A5%E6%9C%AC%E6%88%98%E4%BA%89%E5%AB%8C%E7%8A%AF%E8%AE%AF%E9%97%AE%E6%A1%A3%E6%A1%88&flag=false",
+ "url": "https://www.modernhistory.org.cn/#/SearchResult_list?searchValue=%E6%B0%91%E4%BF%97&seniorType=&selectType=",
+ "defer": true,
"items": "multiple"
},
{
"type": "web",
- "url": "https://www.modernhistory.org.cn/#/SearchResult_list?searchValue=%E6%B0%91%E4%BF%97&seniorType=&selectType=",
+ "url": "https://www.modernhistory.org.cn/#/DocumentDetails_tp?fileCode=9999_tp_00000006&title=%E9%A6%99%E6%B8%AF%E5%8D%8E%E8%8A%B3%E7%85%A7%E7%9B%B8%E9%A6%86%EF%BC%88AFong%EF%BC%89%E6%91%84%E5%BD%B1%E9%9B%86&flag=false",
+ "defer": true,
+ "items": "multiple"
+ },
+ {
+ "type": "web",
+ "url": "https://www.modernhistory.org.cn/#/DocumentDetails_da?fileCode=1003_da_0005&title=%E5%9F%83%E6%96%87%E6%96%AF%C2%B7%E5%8D%A1%E5%B0%94%E9%80%8A%E8%97%8F%E6%8A%97%E6%88%98%E5%8F%B2%E6%96%99%E6%A1%A3%E6%A1%88&flag=false",
+ "defer": true,
"items": "multiple"
}
]
diff --git a/PatentStar.js b/PatentStar.js
index f1928d21..c98b4d34 100644
--- a/PatentStar.js
+++ b/PatentStar.js
@@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
- "lastUpdated": "2024-07-23 14:21:25"
+ "lastUpdated": "2024-11-26 05:51:41"
}
/*
@@ -50,12 +50,12 @@ function detectWeb(doc, url) {
}
function getSearchResults(doc, checkOnly) {
- var items = {};
- var found = false;
- var rows = doc.querySelectorAll('label.title-color');
- for (let row of rows) {
- let ane = row.getAttribute('data-ane') || row.getAttribute('data-pne');
- let title = ZU.trimInternal(row.getAttribute('title'));
+ const items = {};
+ let found = false;
+ const rows = doc.querySelectorAll('label.title-color');
+ for (const row of rows) {
+ const ane = row.getAttribute('data-ane') || row.getAttribute('data-pne');
+ const title = ZU.trimInternal(row.getAttribute('title'));
if (!ane || !title) continue;
if (checkOnly) return true;
found = true;
@@ -66,10 +66,10 @@ function getSearchResults(doc, checkOnly) {
async function doWeb(doc, url) {
if (detectWeb(doc, url) == 'multiple') {
- let items = await Zotero.selectItems(getSearchResults(doc, false));
+ const items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
- for (let ane of Object.keys(items)) {
- let href = `/Search/Detail?ANE=${ane}`;
+ for (const ane of Object.keys(items)) {
+ const href = `/Search/Detail?ANE=${ane}`;
await scrape(await requestDocument(href), href);
}
}
@@ -119,7 +119,7 @@ async function scrape(doc, url) {
2: "实用新型专利",
3: "外观专利"
}[jsonData.PT];
- jsonData.IN.split(/\s?;/).forEach(creator => newItem.creators.push(processName(creator)));
+ jsonData.IN.split(/\s?;/).forEach(creator => newItem.creators.push(cleanAuthor(creator)));
}
catch (error) {
Z.debug(error);
@@ -139,7 +139,7 @@ async function scrape(doc, url) {
newItem.legalStatus = labels.get('当前状态');
newItem.rights = text(doc, '.item-summary > p:nth-child(2) > strong+span');
labels.get('发明人', true).querySelectorAll('a').forEach((element) => {
- newItem.creators.push(processName(ZU.trimInternal(element.textContent)));
+ newItem.creators.push(cleanAuthor(ZU.trimInternal(element.textContent)));
});
}
@@ -242,10 +242,9 @@ function toISODate(str) {
return str.replace(/^(\d{4})(\d{2})/, "$1-$2-");
}
-function processName(creator) {
- let creatorType = creator.endsWith('指导') ? 'contributor' : 'inventor';
- creator = ZU.cleanAuthor(creator.replace(/[等主编著;]*$/, ''), creatorType);
- if (/[\u4e00-\u9fa5]/.test(creator.lastName)) {
+function cleanAuthor(name) {
+ const creator = ZU.cleanAuthor(name.replace(/指导$/, ''), name.endsWith('指导') ? 'contributor' : 'inventor');
+ if (/[\u4e00-\u9fff]/.test(creator.lastName)) {
creator.lastName = creator.firstName + creator.lastName;
creator.firstName = '';
creator.fieldMode = 1;