Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graceful shutdown -- handle termination signals #14

Merged
merged 4 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion demo/demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ clear

pe "# For the sake of the asciicast: PAT is present in RUNNER_PAT environment variable"
pe "# -r 1: to run once only, the default is to run forever"
pe "./orchestrator.sh -v -r 1 -p efrecon/gh-runner-krunvm -- 1"
pe "./orchestrator.sh -v -- -r 1 -p efrecon/gh-runner-krunvm"
51 changes: 51 additions & 0 deletions lib/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,57 @@ run_krunvm() {
buildah unshare krunvm "$@"
}

tac() {
awk '{ buffer[NR] = $0; } END { for(i=NR; i>0; i--) { print buffer[i] } }'
}

# This is the same as pgrep -P, but using ps and awk. The option (and keywords)
# used also exist on macOS, so this implementation should be cross platform.
pgrep_P() {
ps -A -o pid,ppid | awk -v pid="$1" '$2 == pid { print $1 }'
}

ps_tree() {
if [ -n "$1" ]; then
printf %s\\n "$1"
for _pid in $(pgrep_P "$1"); do
ps_tree "$_pid"
done
fi
}

running() {
# Construct a comma separated list of pids to wait for
_pidlist=
for _pid; do
_pidlist="${_pidlist},${_pid}"
done

# Construct the list of those pids that are still running
ps -p "${_pidlist#,}" -o pid= 2>/dev/null | awk '{ print $1 }'
}

waitpid() {
# Construct the list of those pids that are still running
_running=$(running "$@")

# If not empty, sleep and try again with the list of running pids (so we avoid
# having the same PID that would reappear -- very unlikely)
if [ -n "$_running" ]; then
sleep 1
# shellcheck disable=SC2086 # We want to expand the list of pids
waitpid $_running
fi
}

kill_tree() {
verbose "Killing process tree for $1"
for pid in $(ps_tree "$1"|tac); do
debug "Killing process $pid"
kill -s "${2:-TERM}" -- "$pid" 2>/dev/null
done
}


find_pattern() {
_type=$(to_lower "${2:-f}")
Expand Down
20 changes: 14 additions & 6 deletions orchestrator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,15 @@ KRUNVM_RUNNER_LOG=$ORCHESTRATOR_LOG
KRUNVM_RUNNER_VERBOSE=$ORCHESTRATOR_VERBOSE

cleanup() {
trap - INT TERM EXIT
trap '' EXIT

for pid in $ORCHESTRATOR_PIDS; do
verbose "Killing runner loop $pid"
kill "$pid"
done
verbose "Waiting for runners to die"
# shellcheck disable=SC2086 # We want to wait for all pids
waitpid $ORCHESTRATOR_PIDS

if run_krunvm list | grep -qE "^${ORCHESTRATOR_PREFIX}-"; then
while IFS= read -r vm; do
Expand Down Expand Up @@ -118,7 +126,8 @@ if [ "$ORCHESTRATOR_ISOLATION" = 1 ]; then
else
verbose "Creating $ORCHESTRATOR_RUNNERS insecure runner loops"
fi
trap cleanup INT TERM EXIT

trap cleanup EXIT

# Pass essential variables, verbosity and log configuration to main runner
# script.
Expand Down Expand Up @@ -159,7 +168,6 @@ for i in $(seq 1 "$ORCHESTRATOR_RUNNERS"); do
fi
done

verbose "Waiting for runners to die"
for pid in $ORCHESTRATOR_PIDS; do
wait "$pid"
done
# shellcheck disable=SC2086 # We want to wait for all pids
waitpid $ORCHESTRATOR_PIDS
cleanup
50 changes: 48 additions & 2 deletions runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ $(printf %s\\n "$RUNNER_MOUNT")
EOF
fi
run_krunvm create "$RUNNER_IMAGE" "$@"

}


Expand Down Expand Up @@ -263,7 +262,14 @@ EOF
done
fi
verbose "Starting microVM '${RUNNER_PREFIX}-$_id' with entrypoint $RUNNER_ENTRYPOINT"
run_krunvm start "${RUNNER_PREFIX}-$_id" "$RUNNER_ENTRYPOINT" -- "$@"
optstate=$(set +o)
set -m; # Disable job control
run_krunvm start "${RUNNER_PREFIX}-$_id" "$RUNNER_ENTRYPOINT" -- "$@" </dev/null &
RUNNER_PID=$!
eval "$optstate"; # Restore options
verbose "Started microVM '${RUNNER_PREFIX}-$_id' with PID $RUNNER_PID"
wait "$RUNNER_PID"
RUNNER_PID=
}


Expand All @@ -278,13 +284,53 @@ vm_delete() {
run_krunvm delete "${RUNNER_PREFIX}-$1"
}

vm_terminate() {
if [ -n "$RUNNER_ENVIRONMENT" ]; then
if [ -f "${RUNNER_ENVIRONMENT}/${RUNNER_ID}.tkn" ]; then
if [ -n "${RUNNER_SECRET:-}" ]; then
verbose "Requesting termination via ${RUNNER_ENVIRONMENT}/${RUNNER_ID}.trm"
printf %s\\n "$RUNNER_SECRET" > "${RUNNER_ENVIRONMENT}/${RUNNER_ID}.trm"
elif [ -n "$RUNNER_PID" ]; then
kill_tree "$RUNNER_PID"
fi
if [ "$RUNNER_PID" ]; then
# shellcheck disable=SC2046 # We want to wait for all children
waitpid $(ps_tree "$RUNNER_PID"|tac)
else
warning "No PID to wait for"
fi
elif [ -n "$RUNNER_PID" ]; then
kill_tree "$RUNNER_PID"
# shellcheck disable=SC2046 # We want to wait for all children
waitpid $(ps_tree "$RUNNER_PID"|tac)
fi
elif [ -n "$RUNNER_PID" ]; then
kill_tree "$RUNNER_PID"
# shellcheck disable=SC2046 # We want to wait for all children
waitpid $(ps_tree "$RUNNER_PID"|tac)
fi
}

cleanup() {
trap '' EXIT
if [ -n "$RUNNER_PID" ]; then
vm_terminate
fi
if [ -n "$RUNNER_ID" ]; then
vm_delete "$RUNNER_ID"
fi
}

trap cleanup EXIT


iteration=0
while true; do
RUNNER_ID="${loop}-$(random_string)"
vm_create "${RUNNER_ID}"
vm_start "${RUNNER_ID}"
vm_delete "${RUNNER_ID}"
RUNNER_ID=

if [ "$RUNNER_REPEAT" -gt 0 ]; then
iteration=$((iteration+1))
Expand Down
24 changes: 22 additions & 2 deletions runner/runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,12 @@ fi

# Start the runner.
verbose "Starting runner as user '$RUNNER_USER' (current user=$(id -un)): $*"
RUNNER_PID=
case "$RUNNER_USER" in
root)
if [ "$(id -u)" = "0" ]; then
"$@"
"$@" &
RUNNER_PID="$!"
else
error "Cannot start runner as root from non-root user"
fi
Expand All @@ -433,7 +435,8 @@ case "$RUNNER_USER" in
if [ "$(id -u)" = "0" ]; then
verbose "Starting runner as $RUNNER_USER"
chown -R "$RUNNER_USER" "$RUNNER_WORKDIR"
runas "$@"
runas "$@" &
RUNNER_PID="$!"
elif [ "$(id -un)" = "$RUNNER_USER" ]; then
"$@"
else
Expand All @@ -444,3 +447,20 @@ case "$RUNNER_USER" in
fi
;;
esac

if [ -n "$RUNNER_PID" ]; then
while [ -n "$(running "$RUNNER_PID")" ]; do
if [ -n "${RUNNER_TOKENFILE:-}" ] && [ -n "${RUNNER_SECRET:-}" ]; then
if [ -f "${RUNNER_TOKENFILE%.*}.trm" ]; then
break=$(cat "${RUNNER_TOKENFILE%.*}.trm")
if [ "$break" = "$RUNNER_SECRET" ]; then
verbose "Termination file found, stopping runner"
kill "$RUNNER_PID"
runner_unregister 1
else
warning "Termination found at ${RUNNER_TOKENFILE%.*}.trm, but it does not match the secret"
fi
fi
fi
done
fi
efrecon marked this conversation as resolved.
Show resolved Hide resolved
Loading