diff --git a/demo/demo.sh b/demo/demo.sh index 54aa86d..d6340ab 100755 --- a/demo/demo.sh +++ b/demo/demo.sh @@ -15,4 +15,4 @@ clear pe "# For the sake of the asciicast: PAT is present in RUNNER_PAT environment variable" pe "# -r 1: to run once only, the default is to run forever" -pe "./orchestrator.sh -v -r 1 -p efrecon/gh-runner-krunvm -- 1" +pe "./orchestrator.sh -v -- -r 1 -p efrecon/gh-runner-krunvm" diff --git a/lib/common.sh b/lib/common.sh index feaa8f6..ded9f90 100644 --- a/lib/common.sh +++ b/lib/common.sh @@ -75,6 +75,57 @@ run_krunvm() { buildah unshare krunvm "$@" } +tac() { + awk '{ buffer[NR] = $0; } END { for(i=NR; i>0; i--) { print buffer[i] } }' +} + +# This is the same as pgrep -P, but using ps and awk. The option (and keywords) +# used also exist on macOS, so this implementation should be cross platform. +pgrep_P() { + ps -A -o pid,ppid | awk -v pid="$1" '$2 == pid { print $1 }' +} + +ps_tree() { + if [ -n "$1" ]; then + printf %s\\n "$1" + for _pid in $(pgrep_P "$1"); do + ps_tree "$_pid" + done + fi +} + +running() { + # Construct a comma separated list of pids to wait for + _pidlist= + for _pid; do + _pidlist="${_pidlist},${_pid}" + done + + # Construct the list of those pids that are still running + ps -p "${_pidlist#,}" -o pid= 2>/dev/null | awk '{ print $1 }' +} + +waitpid() { + # Construct the list of those pids that are still running + _running=$(running "$@") + + # If not empty, sleep and try again with the list of running pids (so we avoid + # having the same PID that would reappear -- very unlikely) + if [ -n "$_running" ]; then + sleep 1 + # shellcheck disable=SC2086 # We want to expand the list of pids + waitpid $_running + fi +} + +kill_tree() { + verbose "Killing process tree for $1" + for pid in $(ps_tree "$1"|tac); do + debug "Killing process $pid" + kill -s "${2:-TERM}" -- "$pid" 2>/dev/null + done +} + find_pattern() { _type=$(to_lower "${2:-f}") diff --git a/orchestrator.sh b/orchestrator.sh index aac0706..10e69df 100755 --- a/orchestrator.sh +++ b/orchestrator.sh @@ -89,7 +89,15 @@ KRUNVM_RUNNER_LOG=$ORCHESTRATOR_LOG KRUNVM_RUNNER_VERBOSE=$ORCHESTRATOR_VERBOSE cleanup() { - trap - INT TERM EXIT + trap '' EXIT + + for pid in $ORCHESTRATOR_PIDS; do + verbose "Killing runner loop $pid" + kill "$pid" + done + verbose "Waiting for runners to die" + # shellcheck disable=SC2086 # We want to wait for all pids + waitpid $ORCHESTRATOR_PIDS if run_krunvm list | grep -qE "^${ORCHESTRATOR_PREFIX}-"; then while IFS= read -r vm; do @@ -118,7 +126,8 @@ if [ "$ORCHESTRATOR_ISOLATION" = 1 ]; then else verbose "Creating $ORCHESTRATOR_RUNNERS insecure runner loops" fi -trap cleanup INT TERM EXIT + +trap cleanup EXIT # Pass essential variables, verbosity and log configuration to main runner # script. @@ -159,7 +168,6 @@ for i in $(seq 1 "$ORCHESTRATOR_RUNNERS"); do fi done -verbose "Waiting for runners to die" -for pid in $ORCHESTRATOR_PIDS; do - wait "$pid" -done +# shellcheck disable=SC2086 # We want to wait for all pids +waitpid $ORCHESTRATOR_PIDS +cleanup diff --git a/runner.sh b/runner.sh index d7e669a..23f2fde 100755 --- a/runner.sh +++ b/runner.sh @@ -223,7 +223,6 @@ $(printf %s\\n "$RUNNER_MOUNT") EOF fi run_krunvm create "$RUNNER_IMAGE" "$@" - } @@ -263,7 +262,14 @@ EOF done fi verbose "Starting microVM '${RUNNER_PREFIX}-$_id' with entrypoint $RUNNER_ENTRYPOINT" - run_krunvm start "${RUNNER_PREFIX}-$_id" "$RUNNER_ENTRYPOINT" -- "$@" + optstate=$(set +o) + set -m; # Disable job control + run_krunvm start "${RUNNER_PREFIX}-$_id" "$RUNNER_ENTRYPOINT" -- "$@" "${RUNNER_ENVIRONMENT}/${RUNNER_ID}.trm" + elif [ -n "$RUNNER_PID" ]; then + kill_tree "$RUNNER_PID" + fi + if [ "$RUNNER_PID" ]; then + # shellcheck disable=SC2046 # We want to wait for all children + waitpid $(ps_tree "$RUNNER_PID"|tac) + else + warning "No PID to wait for" + fi + elif [ -n "$RUNNER_PID" ]; then + kill_tree "$RUNNER_PID" + # shellcheck disable=SC2046 # We want to wait for all children + waitpid $(ps_tree "$RUNNER_PID"|tac) + fi + elif [ -n "$RUNNER_PID" ]; then + kill_tree "$RUNNER_PID" + # shellcheck disable=SC2046 # We want to wait for all children + waitpid $(ps_tree "$RUNNER_PID"|tac) + fi +} + +cleanup() { + trap '' EXIT + if [ -n "$RUNNER_PID" ]; then + vm_terminate + fi + if [ -n "$RUNNER_ID" ]; then + vm_delete "$RUNNER_ID" + fi +} + +trap cleanup EXIT + iteration=0 while true; do @@ -285,6 +330,7 @@ while true; do vm_create "${RUNNER_ID}" vm_start "${RUNNER_ID}" vm_delete "${RUNNER_ID}" + RUNNER_ID= if [ "$RUNNER_REPEAT" -gt 0 ]; then iteration=$((iteration+1)) diff --git a/runner/runner.sh b/runner/runner.sh index 37c4d07..b4c313a 100755 --- a/runner/runner.sh +++ b/runner/runner.sh @@ -420,10 +420,12 @@ fi # Start the runner. verbose "Starting runner as user '$RUNNER_USER' (current user=$(id -un)): $*" +RUNNER_PID= case "$RUNNER_USER" in root) if [ "$(id -u)" = "0" ]; then - "$@" + "$@" & + RUNNER_PID="$!" else error "Cannot start runner as root from non-root user" fi @@ -433,7 +435,8 @@ case "$RUNNER_USER" in if [ "$(id -u)" = "0" ]; then verbose "Starting runner as $RUNNER_USER" chown -R "$RUNNER_USER" "$RUNNER_WORKDIR" - runas "$@" + runas "$@" & + RUNNER_PID="$!" elif [ "$(id -un)" = "$RUNNER_USER" ]; then "$@" else @@ -444,3 +447,21 @@ case "$RUNNER_USER" in fi ;; esac + +if [ -n "$RUNNER_PID" ]; then + while [ -n "$(running "$RUNNER_PID")" ]; do + if [ -n "${RUNNER_TOKENFILE:-}" ] && [ -n "${RUNNER_SECRET:-}" ]; then + if [ -f "${RUNNER_TOKENFILE%.*}.trm" ]; then + break=$(cat "${RUNNER_TOKENFILE%.*}.trm") + if [ "$break" = "$RUNNER_SECRET" ]; then + verbose "Termination file found, stopping runner" + kill "$RUNNER_PID" + runner_unregister 1 + else + warning "Termination found at ${RUNNER_TOKENFILE%.*}.trm, but it does not match the secret" + fi + fi + fi + sleep 1 + done +fi