Skip to content

Commit

Permalink
Update check-links and run after uplift
Browse files Browse the repository at this point in the history
  • Loading branch information
avillar committed Aug 12, 2024
1 parent bcc56aa commit 6c13018
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
name: Check URLs are resolvable

on: [workflow_dispatch, push]
on:
workflow_dispatch:
workflow_call:

jobs:
check-urls:
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/semantic-uplift.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,7 @@ jobs:
uses: EndBug/add-and-commit@v9
with:
add: '**/*.ttl'

check-links:
name: Check links
uses: ./.github/actions/check-links.yml
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
jupyterlab
ogc-na
ogc-na
pyyaml
requests
32 changes: 18 additions & 14 deletions scripts/check_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,48 @@
import yaml
import sys


def load_yaml(path):
with open(path) as f:
return yaml.safe_load(f)



def check_url(url):
user_agent = 'Mozilla/20.0.1 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent': user_agent }
try:
r = requests.head(url, headers = headers, stream = True, timeout = 1)
headers = {'User-Agent': user_agent}
try:
r = requests.head(url, headers=headers, stream=True, timeout=1)
return r.ok, r.status_code
except Exception as e:
except Exception as e:
pass
try:
try:
# Use stream = True so that the request does not actually download any data
r = requests.get(url, headers = headers, stream = True, timeout = 2)
r = requests.get(url, headers=headers, stream=True, timeout=2)
return r.ok, r.status_code
except Exception as e:
except Exception as e:
return False, e



def check_urls_on_keys(file, yaml_list, keys):
for name, entry in yaml_list.items():
for key in keys:
for url in entry.get(key, []):
print(f"Checking {file} {name}.{key} {url}", end = "")
print(f"Checking {file} {name}.{key} {url}", end="")
resolvable, info = check_url(url)
print(f" {info}")
if not resolvable:
if not resolvable:
yield (file, name, key, url, info)


properties = load_yaml("generic/properties.yml")["properties"]
sensors = load_yaml("sensor.community/sensors.yml")["sensors"]
sensors = load_yaml("generic/sensors.yml")["sensors"]

issues = [
*check_urls_on_keys("generic/properties.yml", properties, ["sameAs", "seeAlso"]),
*check_urls_on_keys("sensor.community/sensors.yml", properties, ["references"]),
*check_urls_on_keys("generic/sensors.yml", properties, ["references"]),
]

if issues:
for file, name, key, url, info in issues:
print(f"{file} {name}.{key} {url} {info}", file=sys.stderr)
sys.exit(112)
sys.exit(112)

0 comments on commit 6c13018

Please sign in to comment.