Skip to content

Commit 8594b37

Browse files
sitemap
1 parent 41cf470 commit 8594b37

File tree

7 files changed

+199
-0
lines changed

7 files changed

+199
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Generated sitemaps
2+
*.xml
3+
14
# Logs
25
logs
36
*.log

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
# phaidra-ir-utils
22
support tools for phaidra-ir
3+
4+
# usage
5+
npm run start

index.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Set options as a parameter, environment variable, or rc file.
2+
require = require("esm")(module/*, options*/)
3+
module.exports = require("./main.js")

main.js

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import fs from 'fs'
2+
import qs from 'qs'
3+
import axios from 'axios'
4+
import config from './phaidra-ir'
5+
6+
function getSitemapXml(urls) {
7+
let str = '<?xml version="1.0" encoding="UTF-8"?>'
8+
str += "\n"
9+
str += '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
10+
str += "\n"
11+
for (let u of urls) {
12+
str += " <url>\n"
13+
str += ' <loc>' + u.loc + "</loc>\n"
14+
if (u.lastmod) {
15+
str += ' <lastmod>' + u.lastmod + "</lastmod>\n"
16+
}
17+
str += " </url>\n"
18+
}
19+
str += "</urlset>\n"
20+
return str
21+
}
22+
23+
function getIndexfileXml(files) {
24+
let str = '<?xml version="1.0" encoding="UTF-8"?>'
25+
str += "\n"
26+
str += '<sitemapindex xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
27+
str += "\n"
28+
for (let f of files) {
29+
str += " <sitemap>\n"
30+
str += ' <loc>' + f.loc + "</loc>\n"
31+
if (f.lastmod) {
32+
str += ' <lastmod>' + f.lastmod + "</lastmod>\n"
33+
}
34+
str += " </sitemap>\n"
35+
}
36+
str += "</sitemapindex>\n"
37+
return str
38+
}
39+
40+
async function createSitemap(){
41+
let params = {
42+
q: '*:*',
43+
fq: 'ispartof:\"' + config.ircollection + '\"',
44+
defType: 'edismax',
45+
wt: 'json',
46+
start: 0,
47+
rows: '99999',
48+
sort: 'modified desc'
49+
}
50+
let docs = []
51+
let total = 0
52+
try {
53+
let response = await axios.request({
54+
method: 'POST',
55+
url: config.solr + '/select',
56+
data: qs.stringify(params, { arrayFormat: 'repeat' }),
57+
headers: {
58+
'content-type': 'application/x-www-form-urlencoded'
59+
}
60+
})
61+
docs = response.data.response.docs
62+
total = response.data.response.numFound
63+
} catch (error) {
64+
console.error(error)
65+
}
66+
67+
let i = 0
68+
let indexFiles = []
69+
let indexFilesIdx = 1
70+
let urls = []
71+
let latestModified = '1970-01-01'
72+
for (let doc of docs) {
73+
i++
74+
75+
if (doc.modified > latestModified) {
76+
latestModified = doc.modified
77+
}
78+
79+
console.log('[' + i + '/' + total + '] processing title[' + doc.pid + '] latestModified[' + latestModified + ']')
80+
urls.push({ loc: 'https://' + config.baseurl + '/' + doc.pid, lastmod: doc.modified })
81+
82+
if (urls.length > 10000) {
83+
let xml = getSitemapXml(urls)
84+
let filename = 'sitemap' + indexFilesIdx + '.xml'
85+
console.log('creating sitemap file ' + filename)
86+
fs.writeFileSync(config.sitemapfolder + filename, xml)
87+
indexFiles.push({ loc: 'https://' + config.baseurl + '/' + filename, lastmod: latestModified })
88+
indexFilesIdx++;
89+
latestModified = '1970-01-01'
90+
urls = []
91+
}
92+
}
93+
94+
// rest
95+
if (urls.length > 1) {
96+
let xml = getSitemapXml(urls)
97+
let filename = 'sitemap' + indexFilesIdx + '.xml'
98+
console.log('creating sitemap file ' + filename)
99+
fs.writeFileSync(config.sitemapfolder + filename, xml)
100+
indexFiles.push({ loc: 'https://' + config.baseurl + '/sitemap' + indexFilesIdx + '.xml', lastmod: latestModified })
101+
indexFilesIdx++;
102+
}
103+
104+
let idxXml = getIndexfileXml(indexFiles)
105+
console.log('creating sitemap index')
106+
fs.writeFileSync(config.sitemapfolder + 'sitemap.xml', idxXml)
107+
}
108+
109+
console.log('started')
110+
createSitemap().then(
111+
function() {
112+
console.log('finished')
113+
}
114+
)

package-lock.json

Lines changed: 53 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"name": "phaidra-ir-utils",
3+
"version": "0.0.1",
4+
"description": "support tools for phaidra-ir",
5+
"main": "index.js",
6+
"module": "main.js",
7+
"scripts": {
8+
"start": "node -r esm main.js",
9+
"test": "echo \"Error: no test specified\" && exit 1"
10+
},
11+
"repository": {
12+
"type": "git",
13+
"url": "[email protected]:phaidra/phaidra-ir-utils.git"
14+
},
15+
"author": "",
16+
"license": "Apache-2.0",
17+
"dependencies": {
18+
"axios": "^0.19.0",
19+
"esm": "^3.2.22",
20+
"qs": "^6.9.1"
21+
}
22+
}

phaidra-ir.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/etc/phaidra/phaidra-ir.js

0 commit comments

Comments
 (0)