Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build and host algorithm images on GitHub (base images) #20

Merged
merged 30 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
88f4490
wip: first version of PR test workflow
CodeLionX Nov 15, 2023
6653d5f
feat: adapt first algorithm to new infrastructure
CodeLionX Nov 15, 2023
221fd40
wip
CodeLionX Nov 15, 2023
97cd8cf
fix: job generation script
CodeLionX Nov 15, 2023
51ed06b
feat: adapt second algorithm to new infrastructure
CodeLionX Nov 15, 2023
7a6358d
fix: python version
CodeLionX Nov 15, 2023
74b50e4
wip
CodeLionX Nov 15, 2023
cdd0660
wip
CodeLionX Nov 15, 2023
45cce51
wip
CodeLionX Nov 15, 2023
9ae03b8
feat: allow matrix generation script to run in different contexts (fo…
CodeLionX Nov 16, 2023
6387ae6
refactor: split up base images and intermediate images; rename folders
CodeLionX Nov 16, 2023
cd5424b
feat: adjust workflow to build images in order
CodeLionX Nov 16, 2023
1c13e8b
fix: build matrix computation script
CodeLionX Nov 16, 2023
3e9af8f
wip
CodeLionX Nov 16, 2023
b8e4d13
wip
CodeLionX Nov 16, 2023
e4b727c
test empty matrix
CodeLionX Nov 16, 2023
c840567
test empty matrix
CodeLionX Nov 16, 2023
182c762
wip
CodeLionX Nov 16, 2023
672dc1f
wip
CodeLionX Nov 16, 2023
77107e4
feat: prepare image publishing and adapt docker images
CodeLionX Nov 17, 2023
9c9089c
wip
CodeLionX Nov 17, 2023
d0b584a
chore: restore lof and sublof algorithms and use kmeans; also add lic…
CodeLionX Nov 17, 2023
e0199f6
wip
CodeLionX Nov 17, 2023
299fa53
feat: adjust r base image
CodeLionX Nov 20, 2023
7903e27
chore: cleanup workflow definition
CodeLionX Nov 20, 2023
34d2b91
refactor: revert changes to intermediate images and algos (later PR)
CodeLionX Nov 20, 2023
3acf0db
feat: test image push
CodeLionX Nov 20, 2023
5c37906
feat: test image push again
CodeLionX Nov 20, 2023
b4df7eb
fix: image license information
CodeLionX Nov 20, 2023
d26d6a1
feat: fix version information in image labels and finish PR
CodeLionX Nov 20, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions .ci/check_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python3
import json
import sys

from pathlib import Path

MODEL_FILEPATH = Path("./results/model.pkl")
SCORES_FILEPATH = Path("./results/scores.csv")


def parse_manifest(algorithm: str) -> dict:
manifest_path = Path(".") / algorithm / "manifest.json"
with manifest_path.open("r") as fh:
manifest = json.load(fh)
return manifest


def is_readable(filename: Path) -> bool:
stat = filename.stat()
return stat.st_uid == 1000 and stat.st_gid == 1000


def has_postprocessing(algorithm: str) -> bool:
readme_path = Path(".") / algorithm / "README.md"
if not readme_path.exists():
return False

with readme_path.open("r") as fh:
readme = fh.readlines()

marker = ["<!--BEGIN:timeeval-post-->", "<!--END:timeeval-post-->"]
return any([m in l for m in marker for l in readme])


def main(algorithm):
manifest = parse_manifest(algorithm)
errors = []

if manifest["learningType"].lower() in ["supervised", "semi-supervised"]:
# check model.pkl
if not is_readable(MODEL_FILEPATH):
errors.append("Model file was written with the wrong user and/or group. Do you use a TimeEval base image?")

# check scores.csv
if not is_readable(SCORES_FILEPATH):
errors.append("Scoring was written with the wrong user and/or group. Do you use a TimeEval base image?")

with SCORES_FILEPATH.open("r") as fh:
lines = fh.readlines()


# if not post-processing, check length
if has_postprocessing(algorithm):
print("Skipping scoring (scores.csv) check, because algorithm uses post-processing!")
else:
# only a single column/dimension:
if any(["," in l for l in lines]):
errors.append("Scoring contains multiple dimensions (found a ',' in the file). "
"Only a single anomaly score is allowed per time step!")

# there should be no header
try:
float(lines[0])
except ValueError as e:
errors.append(f"No header allowed for the scoring file! First value is not a number! {e}")

# same length as dataset
if manifest["inputDimensionality"].lower() == "univariate":
data_path = Path("./data/dataset.csv")
else:
data_path = Path("./data/multi-dataset.csv")

n_data = 0
with data_path.open("r") as fh:
for _ in fh:
n_data += 1
# substract header
n_data -= 1

if len(lines) != n_data:
errors.append("Scoring has wrong length; each input time step needs an anomaly score "
f"(expected={n_data}, found={len(lines)})!")

for error in errors:
print(error, file=sys.stderr)

if len(errors) > 0:
exit(1)


if __name__ == "__main__":
args = sys.argv
if len(args) != 2:
raise ValueError("You have to spacify an algorithm name (directory / docker image name)!")

main(args[1])
58 changes: 58 additions & 0 deletions .ci/generate-build-matrix.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env bash

set -e

default_branch=main
folder="${1:-.}"
ignore_pattern="0-base-images|1-intermediate-images|2-scripts|data|results|Dockerfile|README.md|\..*|.*\.py|.*\.yml|.*\.sh|.*\.png"
changes_in_basedir=""

function echoerr () {
echo "$@" >&2
}

# GITHUB_EVENT_NAME=pull_request
# GITHUB_BASE_REF=PR target branch (probably default branch)
# GITHUB_HEAD_REF=PR source branch
# GITHUB_REF=refs/pull/<pr_number>/merge
# GITHUB_REF_TYPE=tag or branch
# RUNNER_ARCH=X86, X64, ARM, or ARM64
# RUNNER_OD=Linux, Windows, or macOS

# if this is a workflow for a PR targeting the default branch
if [[ "$GITHUB_EVENT_NAME" == "pull_request" ]] && [[ "$GITHUB_BASE_REF" == "$default_branch" ]]; then
# build diff to main
echoerr "Detected pipeline for a non-default branch (assuming pull request with target $GITHUB_BASE_REF)"
git fetch origin || echoerr "Could not update remote 'origin'! Repository might be out of date."
changes_in_basedir=$( git diff --name-only "refs/remotes/origin/$GITHUB_BASE_REF..HEAD" -- "$folder" | sed "s#${folder//\./\\.}/##" | cut -d '/' -f 1 )
#changes_in_basedir=$( git diff --name-only "$GITHUB_BASE_REF..HEAD" | cut -d '/' -f 1 )

# if this is a workflow for the default branch
elif [[ "$GITHUB_EVENT_NAME" == "push" ]] && [[ "$GITHUB_BASE_REF" == "$default_branch" ]]; then
# build latest commit for the default branch
echoerr "Detected pipeline for default branch"
#changes_in_basedir=$( git diff --name-only "$CI_COMMIT_BEFORE_SHA..$CI_COMMIT_SHA" )
changes_in_basedir=$( git diff --name-only HEAD~1..HEAD -- "$folder" | sed "s#${folder//\./\\.}/##" | cut -d '/' -f 1 )

# if this is a tag-workflow: build all algorithm images
elif [[ "$GITHUB_EVENT_NAME" == "push" ]] && [[ "$GITHUB_REF_TYPE" == "tag" ]]; then
echoerr "Detected pipeline for a tag"
changes_in_basedir=$( ls -1 )

else
echoerr "Cannot determine algorithm images to build! Please check the environment variables:"
env | grep "GITHUB" >&2 && true
echoerr ""
fi

# filter changes: remove non-algorithm-files/-folders and allow grep to find nothing (exit code 1)
changed_algos=$( echo "$changes_in_basedir" | sort | uniq | grep -x -v -E "${ignore_pattern}" || [[ $? == 1 ]] )
# filter changes: remove non-existing algos (e.g. when branch is not up-to-date with default branch or an algorithm was removed)
changed_algos=$( echo "$changed_algos" | while read -r f; do [[ -d "$folder/$f" ]] && echo "$f" || true; done )

if [[ -z "$changed_algos" ]]; then
echoerr "No algorithm changed!"
fi

echoerr "Generating pipeline for algorithms: $(xargs <<<$changed_algos)"
(jq -Rc '[.]' | jq -sc '{"algorithm_name": add}') <<<"${changed_algos}"
31 changes: 31 additions & 0 deletions .ci/get-image-version.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bash

set -e

folder="${1:-}"
SEMVER_REGEX="^(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)(\\-[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?(\\+[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?$"

trim-and-validate() {
local var="$*"
# remove leading whitespace characters
var="${var#"${var%%[![:space:]]*}"}"
# remove trailing whitespace characters
var="${var%"${var##*[![:space:]]}"}"

# validate semver version string
if [[ "$var" =~ $SEMVER_REGEX ]]; then
printf '%s' "$var"
else
echo "Version $var is not a proper version string according to SemVer 'X.Y.Z(-PRERELEASE)(+BUILD)'!" >&2
exit 1
fi
}

if [[ -f "$folder/version.txt" ]]; then
trim-and-validate "$( cat "$folder/version.txt" )"
elif [[ -f "$folder/manifest.json" ]]; then
trim-and-validate "$( jq -r '.version' "$folder/manifest.json" )"
else
echo "No version.txt or manifest.json present. Cannot determine Docker image version!" >&2
exit 1
fi
23 changes: 23 additions & 0 deletions .ci/get_dataset_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env python3
import json
import sys

from pathlib import Path

if __name__ == "__main__":
args = sys.argv
if len(args) != 2:
raise ValueError("You have to specify an algorithm name (directory / docker image name)!")

algorithm = args[1]
manifest_path = Path(".") / algorithm / "manifest.json"
with manifest_path.open("r") as fh:
manifest = json.load(fh)

value = manifest["inputDimensionality"]
if value.lower() == "univariate":
print("data/dataset.csv")
elif value.lower() == "multivariate":
print("data/multi-dataset.csv")
else:
raise ValueError(f"Input dimensionality ({value}) of {algorithm}'s manifest is unknown!")
Loading