Wikipedia Data Store crawls Wikipedia pages from a dump file.
See Maven Repository.
See Plugin of Administration guide.
# Parameter
url=http://download.wikimedia.org/jawiki/latest/jawiki-latest-pages-articles.xml.bz2
limit=10000
# Script
lang="ja"
filetype=format
filename=title
url="https://ja.wikipedia.org/wiki/" + encodedTitle
host="ja.wikipedia.org"
site="ja.wikipedia.org"
title=title
content=content
digest=digest
anchor=
content_length=content.length()
last_modified=timestamp
timestamp=timestamp