Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add/cleanup #331

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
129 changes: 129 additions & 0 deletions .github/scripts/clean-database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/usr/bin/env python3

import json
import os
import fnmatch
import requests
import shutil
import sys
import time


def recursive_find(base, pattern="*.json"):
"""recursive find will yield python files in all directory levels
below a base path.

Arguments:
- base (str) : the base directory to search
- pattern: a pattern to match, defaults to *.py
"""
for root, _, filenames in os.walk(base):
for filename in fnmatch.filter(filenames, pattern):
yield os.path.join(root, filename)


def read_json(path):
"""
Read json into dict!
"""
with open(path, "r") as fd:
content = json.loads(fd.read())
return content


def save_json(meta, path):
with open(path, "w") as fd:
fd.write(json.dumps(meta, indent=4))


def clean(repos_file, database, archive):
"""
Clean the database
"""
# Read in repos.txt and remove archived
with open(repos_file, "r") as fd:
subset = fd.readlines()

with open("repos.txt", "r") as fd:
repos = fd.readlines()

repos = set([x.strip() for x in repos if x.strip()])
subset = set([x.strip() for x in subset if x.strip()])

for path in recursive_find(database, "*metadata.json"):
meta = read_json(path)
relpath = os.path.relpath(path, database)

# Ensure UID is correct
uid = meta["url"].rsplit("/", 3)
uid = "/".join(uid[1:]).replace(".com", "")

# Only look at subset
if uid not in subset:
continue

# Spurious bug with empty url
if meta["url"] is None:
meta["url"] = meta["data"]["url"]
save_json(meta, path)

try:
res = requests.head(meta.get("html_url") or meta.get("url"))
except:
print("Issue with {path}")
continue

if res.status_code == 200:
continue

elif res.status_code == 404:
print(f"Found repository no longer present at {relpath}, archiving")
newpath = os.path.join(archive, relpath)
newdir = os.path.dirname(newpath)
shutil.move(os.path.dirname(path), newdir)
uid = os.path.dirname(relpath)
if uid in repos:
repos.remove(uid)

elif res.status_code in [301, 302]:
print(f"Found repository {relpath} with moved location, updating")
new_location = requests.head(res.headers["Location"])
meta["url"] = new_location.url
save_json(meta, path)

if uid not in relpath:
old_uid = os.path.dirname(relpath)
shutil.move(os.path.dirname(path), os.path.join(database, uid))
print(f"{relpath} should be {uid}")
if old_uid in repos:
repos.remove(old_uid)
repos.add(uid)

# Save back to file
# This largely isn't needed because we re-generate with export
# But might as well try to maintain consistency here.
with open("repos.txt", "w") as fd:
fd.write("\n".join(list(repos)))


def main():

# python .github/scripts/clean-database.py $(pwd)
if len(sys.argv) < 3:
sys.exit(
"Please provide a root path (with the database and argument) and a text file of repos."
)

root = os.path.abspath(sys.argv[1])
repos_file = os.path.abspath(sys.argv[2])
database = os.path.join(root, "database")
archive = os.path.join(root, "archive")
for path in database, archive:
if not os.path.exists(path):
sys.exit(f"{path} does not exist.")

clean(repos_file, database, archive)


if __name__ == "__main__":
main()
83 changes: 83 additions & 0 deletions .github/workflows/clean.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
name: clean-software

on:
workflow_dispatch:
pull_request: []
schedule:
# Weekly
- cron: 0 7 * * 0

jobs:
clean:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Install Research Software Encyclopedia
run: |
sudo apt-get update && sudo apt-get install -y python3 python3-pip python3-setuptools
git clone https://github.com/rseng/rse /tmp/rse
cd /tmp/rse
sudo pip3 install .[all]
rse --version

# Run a specific letter matched to a day of the month A==1, Z==26
- name: Calendar Updater
uses: vsoch/split-list-action@main
with:
ids_file: repos.txt
outfile: repos-subset.txt

# One letter assigned to each day
calendar_split: true

- name: Clean Repos Subset
env:
RSE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python .github/scripts/clean-database.py $(pwd) repos-subset.txt
rse export repos.txt --force

- name: Export site to docs/
uses: rseng/[email protected]
with:
config: rse.ini
export_dir: docs
force: true
prefix: software
host: https://rseng.github.io

- name: Checkout New Branch
if: (github.event_name == 'pull_request')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BRANCH_AGAINST: "master"
run: |
printf "GitHub Actor: ${GITHUB_ACTOR}\n"
export BRANCH_FROM="update/software-$(date '+%Y-%m-%d')"
git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
git branch
git checkout -b "${BRANCH_FROM}" || git checkout "${BRANCH_FROM}"
git branch

git config --global user.name "github-actions"
git config --global user.email "[email protected]"

git add database/*
git add docs/*

if git diff-index --quiet HEAD --; then
printf "No changes\n"
else
printf "Changes\n"
git commit -m "Automated deployment to update software database $(date '+%Y-%m-%d')"
git push origin "${BRANCH_FROM}"
fi
echo "PULL_REQUEST_FROM_BRANCH=${BRANCH_FROM}" >> $GITHUB_ENV

- name: Open Pull Request
if: (github.event_name == 'pull_request')
uses: vsoch/pull-request-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PULL_REQUEST_BRANCH: "master"
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,11 @@ rse export --type static-web docs/
rse export --type static-web docs/ --force # if exists
```

Further, a static API is exported to [https://rseng.github.io/software/data.json](https://rseng.github.io/software/data.json) that provides a listing of your software repositories
for some programmatic usage.
Further, a static API is exported to [https://rseng.github.io/software/data.json](https://rseng.github.io/software/data.json) that provides a listing of your software repositories for some programmatic usage. We also clean (and [archive](archive)) repositories that are no longer found:

```bash
$ python .github/scripts/clean-database.py $(pwd)
```

# Development Work

Expand Down
5 changes: 2 additions & 3 deletions database/github/3D-e-Chem/3D-e-Chem-VM/metadata.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"parser": "github",
"uid": "github/3D-e-Chem/3D-e-Chem-VM",
"url": "https://api.github.com/repos/3D-e-Chem/3D-e-Chem-VM",
"url": "https://github.com/3D-e-Chem/3D-e-Chem-VM",
"data": {
"timestamp": "2020-07-05 00:17:07.943003",
"name": "3D-e-Chem-VM",
"url": "https://api.github.com/repos/3D-e-Chem/3D-e-Chem-VM",
"url": "https://github.com/3D-e-Chem/3D-e-Chem-VM",
"full_name": "3D-e-Chem/3D-e-Chem-VM",
"html_url": "https://github.com/3D-e-Chem/3D-e-Chem-VM",
"private": false,
Expand Down Expand Up @@ -37,7 +37,6 @@
"doi": "10.5281/zenodo.594559",
"name": "3D-e-Chem-VM",
"full_name": "3D-e-Chem/3D-e-Chem-VM",
"html_url": "https://github.com/3D-e-Chem/3D-e-Chem-VM",
"private": false,
"description": "Virtual machine with all software and sample data to run 3D-e-Chem Knime workflows",
"created_at": "2016-01-11T21:10:05Z",
Expand Down
5 changes: 2 additions & 3 deletions database/github/3D-e-Chem/knime-gpcrdb/metadata.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"parser": "github",
"uid": "github/3D-e-Chem/knime-gpcrdb",
"url": "https://api.github.com/repos/3D-e-Chem/knime-gpcrdb",
"url": "https://github.com/3D-e-Chem/knime-gpcrdb",
"data": {
"timestamp": "2020-07-05 00:17:14.880978",
"name": "knime-gpcrdb",
"url": "https://api.github.com/repos/3D-e-Chem/knime-gpcrdb",
"url": "https://github.com/3D-e-Chem/knime-gpcrdb",
"full_name": "3D-e-Chem/knime-gpcrdb",
"html_url": "https://github.com/3D-e-Chem/knime-gpcrdb",
"private": false,
Expand Down Expand Up @@ -37,7 +37,6 @@
"doi": "10.5281/zenodo.597174",
"name": "knime-gpcrdb",
"full_name": "3D-e-Chem/knime-gpcrdb",
"html_url": "https://github.com/3D-e-Chem/knime-gpcrdb",
"private": false,
"description": "GPCRDB nodes for Knime",
"created_at": "2016-01-12T13:31:10Z",
Expand Down
5 changes: 2 additions & 3 deletions database/github/3D-e-Chem/knime-klifs/metadata.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"parser": "github",
"uid": "github/3D-e-Chem/knime-klifs",
"url": "https://api.github.com/repos/3D-e-Chem/knime-klifs",
"url": "https://github.com/3D-e-Chem/knime-klifs",
"data": {
"timestamp": "2020-07-05 00:17:15.027377",
"name": "knime-klifs",
"url": "https://api.github.com/repos/3D-e-Chem/knime-klifs",
"url": "https://github.com/3D-e-Chem/knime-klifs",
"full_name": "3D-e-Chem/knime-klifs",
"html_url": "https://github.com/3D-e-Chem/knime-klifs",
"private": false,
Expand Down Expand Up @@ -37,7 +37,6 @@
"doi": "10.5281/zenodo.595324",
"name": "knime-klifs",
"full_name": "3D-e-Chem/knime-klifs",
"html_url": "https://github.com/3D-e-Chem/knime-klifs",
"private": false,
"description": "Knime nodes to interact with KLIFS",
"created_at": "2016-05-11T11:00:29Z",
Expand Down
5 changes: 2 additions & 3 deletions database/github/3D-e-Chem/knime-kripodb/metadata.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"parser": "github",
"uid": "github/3D-e-Chem/knime-kripodb",
"url": "https://api.github.com/repos/3D-e-Chem/knime-kripodb",
"url": "https://github.com/3D-e-Chem/knime-kripodb",
"data": {
"timestamp": "2020-07-05 00:17:06.490235",
"name": "knime-kripodb",
"url": "https://api.github.com/repos/3D-e-Chem/knime-kripodb",
"url": "https://github.com/3D-e-Chem/knime-kripodb",
"full_name": "3D-e-Chem/knime-kripodb",
"html_url": "https://github.com/3D-e-Chem/knime-kripodb",
"private": false,
Expand Down Expand Up @@ -37,7 +37,6 @@
"doi": "10.5281/zenodo.597262",
"name": "knime-kripodb",
"full_name": "3D-e-Chem/knime-kripodb",
"html_url": "https://github.com/3D-e-Chem/knime-kripodb",
"private": false,
"description": "Knime node for KripoDB package",
"created_at": "2016-02-12T11:12:39Z",
Expand Down
5 changes: 2 additions & 3 deletions database/github/3D-e-Chem/knime-molviewer/metadata.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"parser": "github",
"uid": "github/3D-e-Chem/knime-molviewer",
"url": "https://api.github.com/repos/3D-e-Chem/knime-molviewer",
"url": "https://github.com/3D-e-Chem/knime-molviewer",
"data": {
"timestamp": "2020-07-05 00:17:15.191079",
"name": "knime-molviewer",
"url": "https://api.github.com/repos/3D-e-Chem/knime-molviewer",
"url": "https://github.com/3D-e-Chem/knime-molviewer",
"full_name": "3D-e-Chem/knime-molviewer",
"html_url": "https://github.com/3D-e-Chem/knime-molviewer",
"private": false,
Expand Down Expand Up @@ -37,7 +37,6 @@
"doi": "10.5281/zenodo.597231",
"name": "knime-molviewer",
"full_name": "3D-e-Chem/knime-molviewer",
"html_url": "https://github.com/3D-e-Chem/knime-molviewer",
"private": false,
"description": "KNIME node which launches a web browser with moleculer viewer powered by webgl",
"created_at": "2016-08-12T19:13:57Z",
Expand Down
5 changes: 2 additions & 3 deletions database/github/3D-e-Chem/knime-pharmacophore/metadata.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"parser": "github",
"uid": "github/3D-e-Chem/knime-pharmacophore",
"url": "https://api.github.com/repos/3D-e-Chem/knime-pharmacophore",
"url": "https://github.com/3D-e-Chem/knime-pharmacophore",
"data": {
"timestamp": "2020-07-05 00:17:06.675589",
"name": "knime-pharmacophore",
"url": "https://api.github.com/repos/3D-e-Chem/knime-pharmacophore",
"url": "https://github.com/3D-e-Chem/knime-pharmacophore",
"full_name": "3D-e-Chem/knime-pharmacophore",
"html_url": "https://github.com/3D-e-Chem/knime-pharmacophore",
"private": false,
Expand Down Expand Up @@ -37,7 +37,6 @@
"doi": "10.5281/zenodo.997332",
"name": "knime-pharmacophore",
"full_name": "3D-e-Chem/knime-pharmacophore",
"html_url": "https://github.com/3D-e-Chem/knime-pharmacophore",
"private": false,
"description": "KNIME nodes to read/write pharmacophore files",
"created_at": "2017-06-23T15:10:14Z",
Expand Down
5 changes: 2 additions & 3 deletions database/github/3D-e-Chem/knime-plants/metadata.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"parser": "github",
"uid": "github/3D-e-Chem/knime-plants",
"url": "https://api.github.com/repos/3D-e-Chem/knime-plants",
"url": "https://github.com/3D-e-Chem/knime-plants",
"data": {
"timestamp": "2020-07-05 00:17:06.840344",
"name": "knime-plants",
"url": "https://api.github.com/repos/3D-e-Chem/knime-plants",
"url": "https://github.com/3D-e-Chem/knime-plants",
"full_name": "3D-e-Chem/knime-plants",
"html_url": "https://github.com/3D-e-Chem/knime-plants",
"private": false,
Expand Down Expand Up @@ -37,7 +37,6 @@
"doi": "10.5281/zenodo.997272",
"name": "knime-plants",
"full_name": "3D-e-Chem/knime-plants",
"html_url": "https://github.com/3D-e-Chem/knime-plants",
"private": false,
"description": "KNIME nodes to configure, run and analyze PLANTS protein-ligand docking",
"created_at": "2016-11-29T09:55:05Z",
Expand Down
Loading