Skip to content

Commit f4761c8

Browse files
committed
add Encore Library Catalogs.
1 parent 3496e3e commit f4761c8

File tree

1 file changed

+307
-0
lines changed

1 file changed

+307
-0
lines changed

Library Catalog (Encore).js

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
{
2+
"translatorID": "446764bf-7da6-49ec-b7a7-fefcbafe317f",
3+
"label": "Library Catalog (Encore)",
4+
"creator": "Sebastian Karcher",
5+
"target": "/iii/encore/(record|search)",
6+
"minVersion": "1.0",
7+
"maxVersion": "",
8+
"priority": 100,
9+
"inRepository": true,
10+
"translatorType": 4,
11+
"browserSupport": "g",
12+
"lastUpdated": "2011-10-23 20:45:22"
13+
}
14+
15+
/*
16+
Encore Library Catalog Translator
17+
Copyright (C) 2011 Sebastian Karcher and CHNM
18+
19+
This program is free software: you can redistribute it and/or modify
20+
it under the terms of the GNU General Public License as published by
21+
the Free Software Foundation, either version 3 of the License, or
22+
(at your option) any later version.
23+
24+
This program is distributed in the hope that it will be useful,
25+
but WITHOUT ANY WARRANTY; without even the implied warranty of
26+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27+
GNU General Public License for more details.
28+
29+
You should have received a copy of the GNU General Public License
30+
along with this program. If not, see <http://www.gnu.org/licenses/>.
31+
*/
32+
33+
34+
function detectWeb(doc, url){
35+
var bibIdRe = new RegExp("encore/record");
36+
if (bibIdRe.test(url)){
37+
return "book";
38+
}
39+
40+
var namespace = doc.documentElement.namespaceURI;
41+
var nsResolver = namespace ? function(prefix) {
42+
if (prefix == 'x') return namespace; else return null;
43+
} : null;
44+
45+
var bibIdSearch = new RegExp("encore/search");
46+
if (bibIdSearch.test(url)){
47+
return "multiple";
48+
}
49+
}
50+
51+
52+
53+
54+
function doWeb(doc, url) {
55+
var uri = doc.location.href;
56+
var newUri;
57+
// load translator for MARC
58+
var translator = Zotero.loadTranslator("import");
59+
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
60+
translator.getTranslatorObject(function(marc) {
61+
var namespace = doc.documentElement.namespaceURI;
62+
var nsResolver = namespace ? function(prefix) {
63+
if (prefix == 'x') return namespace; else return null;
64+
} : null;
65+
66+
if (detectWeb(doc, url) == "book") {
67+
newUri = uri.replace(/\?/, "?marcData=Y&");
68+
pageByPage(marc, [newUri]);
69+
} else { // Search results page
70+
// Require link to match this
71+
var tagRegexp = new RegExp();
72+
tagRegexp.compile('^https?://[^/]+/search\\??/[^/]+/[^/]+/[0-9]+\%2C[^/]+/frameset');
73+
74+
var urls = new Array();
75+
var availableItems = new Array();
76+
var firstURL = false;
77+
78+
var tableRows = doc.evaluate('//td[@class="browseResultContent" or @class="itemTitleCell"] ',
79+
doc, nsResolver, XPathResult.ANY_TYPE, null);
80+
// Go through table rows
81+
var i = 0;
82+
while(tableRow = tableRows.iterateNext()) {
83+
// get link
84+
var links = doc.evaluate('.//*[@class="dpBibTitle"]/a', tableRow, nsResolver, XPathResult.ANY_TYPE, null);
85+
var link = links.iterateNext();
86+
87+
88+
if(link) {
89+
if(availableItems[link.href]) {
90+
continue;
91+
}
92+
93+
// Go through links
94+
while(link) {
95+
if (link.textContent.match(/\w+/)) availableItems[link.href] = link.textContent;
96+
link = links.iterateNext();
97+
}
98+
i++;
99+
}
100+
};
101+
102+
var items = Zotero.selectItems(availableItems);
103+
if(!items) {
104+
return true;
105+
}
106+
107+
var newUrls = new Array();
108+
for(var itemURL in items) {
109+
newUrls.push(itemURL.replace("?", "?marcData=Y&"));
110+
}
111+
pageByPage(marc, newUrls);
112+
}
113+
});
114+
115+
Zotero.wait();
116+
}
117+
118+
119+
120+
//functions:
121+
function scrape(marc, newDoc) {
122+
var namespace = newDoc.documentElement.namespaceURI;
123+
var nsResolver = namespace ? function(prefix) {
124+
if (prefix == 'x') return namespace; else return null;
125+
} : null;
126+
127+
var xpath = '//pre/text()';
128+
if (newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
129+
var elmts = newDoc.evaluate(xpath, newDoc, null, XPathResult.ANY_TYPE, null);
130+
var useNodeValue = true;
131+
} else {
132+
var elmts = newDoc.evaluate('//pre', newDoc, nsResolver, XPathResult.ANY_TYPE, null);
133+
var useNodeValue = false;
134+
}
135+
136+
var elmt;
137+
while(elmt = elmts.iterateNext()) {
138+
if (useNodeValue) {
139+
var text = elmt.nodeValue;
140+
} else {
141+
var text = elmt.textContent;
142+
}
143+
var newItem = new Zotero.Item();
144+
var record = new marc.record();
145+
146+
var linee = text.split("\n");
147+
for (var i=0; i<linee.length; i++) {
148+
if(!linee[i]) {
149+
continue;
150+
}
151+
152+
linee[i] = linee[i].replace(/[\xA0_\t]/g, " ");
153+
var value = linee[i].substr(7);
154+
155+
if(linee[i].substr(0, 6) == " ") {
156+
// add this onto previous value
157+
tagValue += value;
158+
} else {
159+
if(linee[i].substr(0, 6) == "LEADER") {
160+
// trap leader
161+
record.leader = value;
162+
} else {
163+
if(tagValue) { // finish last tag
164+
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
165+
if(tagValue[0] != marc.subfieldDelimiter) {
166+
tagValue = marc.subfieldDelimiter+"a"+tagValue;
167+
}
168+
169+
// add previous tag
170+
record.addField(tag, ind, tagValue);
171+
}
172+
173+
var tag = linee[i].substr(0, 3);
174+
var ind = linee[i].substr(4, 2);
175+
var tagValue = value;
176+
}
177+
}
178+
}
179+
if(tagValue) {
180+
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
181+
if(tagValue[0] != marc.subfieldDelimiter) {
182+
tagValue = marc.subfieldDelimiter+"a"+tagValue;
183+
}
184+
185+
// add previous tag
186+
record.addField(tag, ind, tagValue);
187+
}
188+
189+
record.translate(newItem);
190+
//the library catalogue name isn't perfect, but should be unambiguous.
191+
var domain = newDoc.location.href.match(/https?:\/\/([^/]+)/);
192+
newItem.repository = domain[1].replace(/encore\./, "")+" Library Catalog";
193+
// there is too much stuff in the note field - or file this as an abstract?
194+
newItem.notes = [];
195+
196+
//editors get mapped as contributos - but so do many others who should be
197+
// --> for books that don't have an author, turn contributors into editors.
198+
if (newItem.itemType=="book"){
199+
for (var i in newItem.creators) {
200+
if (newItem.creators[i].creatorType=="author") var t ="author";
201+
if (!t){
202+
if (newItem.creators[i].creatorType=="contributor") {
203+
newItem.creators[i].creatorType="editor";
204+
}}
205+
}
206+
}
207+
208+
newItem.complete();
209+
}
210+
}
211+
212+
function pageByPage(marc, urls) {
213+
Zotero.Utilities.processDocuments(urls, function(newDoc) {
214+
scrape(marc, newDoc);
215+
}, function() { Zotero.done() });
216+
}
217+
218+
/** BEGIN TEST CASES **/
219+
var testCases = [
220+
{
221+
"type": "web",
222+
"url": "http://encore.tulsalibrary.org/iii/encore/record/C__Rb1951305__Sthelen__P0%2C8__Orightresult__X3?lang=eng&suite=cobalt",
223+
"items": [
224+
{
225+
"itemType": "book",
226+
"creators": [
227+
{
228+
"firstName": "Scott Douglas",
229+
"lastName": "Gerber",
230+
"creatorType": "editor"
231+
}
232+
],
233+
"notes": [],
234+
"tags": [
235+
"United States",
236+
"Politics and government",
237+
"1775-1783",
238+
"United States",
239+
"Politics and government",
240+
"1783-1865",
241+
"United States",
242+
"Politics and government Philosophy"
243+
],
244+
"seeAlso": [],
245+
"attachments": [],
246+
"ISBN": "1568027052",
247+
"title": "The Declaration of Independence: origins and impact",
248+
"place": "Washington, D.C",
249+
"publisher": "CQ Press",
250+
"date": "2002",
251+
"numPages": "347",
252+
"series": "Landmark events in U.S. history series",
253+
"callNumber": "E221 .D35 2002",
254+
"libraryCatalog": "tulsalibrary.org Library Catalog",
255+
"shortTitle": "The Declaration of Independence"
256+
}
257+
]
258+
},
259+
{
260+
"type": "web",
261+
"url": "http://encore.tulsalibrary.org/iii/encore/record/C__Rb1653320__Sthelen__P0%2C2__Orightresult__X4?lang=eng&suite=cobalt",
262+
"items": [
263+
{
264+
"itemType": "book",
265+
"creators": [
266+
{
267+
"firstName": "Sherry",
268+
"lastName": "Shahan",
269+
"creatorType": "author"
270+
},
271+
{
272+
"firstName": "Mary",
273+
"lastName": "Thelen",
274+
"creatorType": "contributor"
275+
}
276+
],
277+
"notes": [],
278+
"tags": [
279+
"Jazz",
280+
"Musical instruments",
281+
"Alphabet"
282+
],
283+
"seeAlso": [],
284+
"attachments": [],
285+
"ISBN": "0399234535",
286+
"title": "The jazzy alphabet",
287+
"place": "New York",
288+
"publisher": "Philomel Books",
289+
"date": "2002",
290+
"numPages": "1",
291+
"callNumber": "Sha",
292+
"libraryCatalog": "tulsalibrary.org Library Catalog"
293+
}
294+
]
295+
},
296+
{
297+
"type": "web",
298+
"url": "http://encore.colorado.edu/iii/encore/search?formids=target&lang=eng&suite=def&reservedids=lang%2Csuite&submitmode=&submitname=&target=thelen&Search.x=0&Search.y=0",
299+
"items": "multiple"
300+
},
301+
{
302+
"type": "web",
303+
"url": "http://encore.coalliance.org/iii/encore/search/C|Sthelen|Orightresult|U1?lang=eng",
304+
"items": "multiple"
305+
}
306+
]
307+
/** END TEST CASES **/

0 commit comments

Comments
 (0)