diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9b85e4ea..920903df 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,7 +8,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: - python-version: '3.7' + python-version: '3.8' - name: Install requirements run: pip install flake8 pycodestyle - name: Check syntax diff --git a/ckanext/spatial/harvesters/waf.py b/ckanext/spatial/harvesters/waf.py index c9edf421..4ea3247f 100644 --- a/ckanext/spatial/harvesters/waf.py +++ b/ckanext/spatial/harvesters/waf.py @@ -235,6 +235,16 @@ def fetch_stage(self, harvest_object): ,adjacent=False, joinString=' ').setResultsName('date') ) +nginx = parse.SkipTo(parse.CaselessLiteral("", include=True).suppress() \ + + parse.Optional(parse.Literal('')).suppress() \ + + parse.Optional(parse.Combine( + parse.Word(parse.alphanums+'-') + + parse.Word(parse.alphanums+':') + ,adjacent=False, joinString=' ').setResultsName('date') + ) + iis = parse.SkipTo("
").suppress() \ + parse.OneOrMore("
").suppress() \ + parse.Optional(parse.Combine( @@ -252,12 +262,15 @@ def fetch_stage(self, harvest_object): scrapers = {'apache': parse.OneOrMore(parse.Group(apache)), + 'nginx': parse.OneOrMore(parse.Group(nginx)), 'other': parse.OneOrMore(parse.Group(other)), 'iis': parse.OneOrMore(parse.Group(iis))} def _get_scraper(server): if not server or 'apache' in server.lower(): return 'apache' + if 'nginx' in server.lower(): + return 'nginx' if server == 'Microsoft-IIS/7.5': return 'iis' else: diff --git a/ckanext/spatial/tests/test_api.py b/ckanext/spatial/tests/test_api.py index 20d16e89..e455761a 100644 --- a/ckanext/spatial/tests/test_api.py +++ b/ckanext/spatial/tests/test_api.py @@ -56,6 +56,13 @@ def test_api(self, app): assert r.headers["Content-Type"] == "application/xml; charset=utf-8" assert r.body == '\nContent 1' + # Access human-readable view of content + url = "/harvest/object/{0}/html".format(object_id_1) + r = app.get(url, status=200) + assert( + r.headers["Content-Type"] == "text/html; charset=utf-8" + ) + # Access original content in object extra (if present) url = "/harvest/object/{0}/original".format(object_id_1) r = app.get(url, status=404) diff --git a/ckanext/spatial/util.py b/ckanext/spatial/util.py index ac4c039e..6a4f08aa 100644 --- a/ckanext/spatial/util.py +++ b/ckanext/spatial/util.py @@ -191,11 +191,11 @@ def get_harvest_object_content(id): return None -def _transform_to_html(content, xslt_package=None, xslt_path=None): +def transform_to_html(content, xslt_package=None, xslt_path=None): xslt_package = xslt_package or __name__ xslt_path = xslt_path or \ - '../templates/ckanext/spatial/gemini2-html-stylesheet.xsl' + 'templates/ckanext/spatial/gemini2-html-stylesheet.xsl' # optimise -- read transform only once and compile rather # than at each request diff --git a/requirements-py2.txt b/requirements-py2.txt index 094497ad..4b01260b 100644 --- a/requirements-py2.txt +++ b/requirements-py2.txt @@ -7,4 +7,4 @@ argparse pyparsing>=2.1.10 requests>=1.1.0 six -geojson==2.5.0 +geojson==2.5.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 565343ae..cbbadd46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ argparse pyparsing>=2.1.10 requests>=1.1.0 six -geojson==2.5.0 +geojson==2.5.0 \ No newline at end of file