Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add log forwarding Pebble layer to sparkd #114

Merged
merged 6 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion images/charmed-spark/bin/sparkd.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
#!/bin/bash

function get_log_layer {
local loki_url=$1
local log_layer_file=${2-"/opt/pebble/log-layer.yaml"}
sed -e "s/\$LOKI_URL/$loki_url/g" \
-e "s/\$FLAVOUR/$FLAVOUR/g" \
-e "s/\$SPARK_APPLICATION_ID/$SPARK_APPLICATION_ID/g" \
-e "s/\$SPARK_USER/$SPARK_USER/g" \
-e "s/\$HOSTNAME/$HOSTNAME/g" \
$log_layer_file
}

function log_forwarding {
# We need to escape special characters from URL to be able to use with template.
local loki_url="$(<<< "$LOKI_URL" sed -e 's`[][\\/.*^$]`\\&`g')"
if [ ! -z "$loki_url" ]; then
echo "Log-forwarding to Loki is enabled."
local rendered_log_layer=$(get_log_layer $loki_url)
echo "$rendered_log_layer" | tee /tmp/rendered_log_layer.yaml
pebble add logging /tmp/rendered_log_layer.yaml
else
echo "Log-forwarding to Loki is disabled."
fi
}

function finish {
if [ $? -ne 0 ]
then
Expand All @@ -9,13 +33,13 @@ function finish {
}
trap finish EXIT


FLAVOUR=$1

echo "Running script with ${FLAVOUR} flavour"

case "${FLAVOUR}" in
driver|executor)
log_forwarding
pushd /opt/spark
./entrypoint.sh "$@"
;;
Expand Down
13 changes: 13 additions & 0 deletions images/charmed-spark/pebble/log-layer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
log-targets:
grafana-agent-k8s:
override: replace
type: loki
location: $LOKI_URL
services: [all]
labels:
product: charmed-spark
role: $FLAVOUR
app: spark
spark_job_id: $SPARK_APPLICATION_ID
user: $SPARK_USER
hostname: $HOSTNAME
rgildein marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 2 additions & 0 deletions images/charmed-spark/rockcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ parts:
bin/spark-client.service-account-registry: opt/spark-client/python/bin/spark-client.service-account-registry
bin/spark-client.spark-shell: opt/spark-client/python/bin/spark-client.spark-shell
bin/spark-client.spark-submit: opt/spark-client/python/bin/spark-client.spark-submit
pebble/log-layer.yaml: opt/pebble/log-layer.yaml
stage:
- etc/spark8t/conf/
- etc/spark/conf/
Expand All @@ -192,6 +193,7 @@ parts:
- opt/spark-client/python/bin/spark-client.service-account-registry
- opt/spark-client/python/bin/spark-client.spark-shell
- opt/spark-client/python/bin/spark-client.spark-submit
- opt/pebble/log-layer.yaml

user-setup:
plugin: nil
Expand Down
70 changes: 70 additions & 0 deletions tests/integration/integration-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@ validate_metrics() {
fi
}

validate_logs() {
log=$1
if [ $(grep -Ri "Log-forwarding to Loki is enabled." $log | wc -l) -lt 3 ]; then
echo "ERROR: Could not validate logs."
echo "DEBUG: Log file:\n$(cat $log)"
exit 1
fi
if [ $(grep -Ri 'Layer \\\\"logging\\\\" added successfully from \\\\"/tmp/rendered_log_layer.yaml\\\\"' $log | wc -l) -lt 3 ]; then
echo "ERROR: Could not validate logs."
echo "DEBUG: Log file:\n$(cat $log)"
exit 1
fi
}

setup_user() {
echo "setup_user() ${1} ${2}"

Expand Down Expand Up @@ -422,6 +436,56 @@ run_example_job_in_pod_with_metrics() {
}


run_example_job_in_pod_with_log_forwarding() {
NAMESPACE=${1-$NAMESPACE}
USERNAME=${2-spark}
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"

PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods --sort-by=.metadata.creationTimestamp | grep driver | tail -n 1 | cut -d' ' -f1)
# start simple http server
LOG_FILE="/tmp/server-loki.log"
SERVER_PORT=9091
python3 tests/integration/resources/test_web_server.py $SERVER_PORT > $LOG_FILE &
HTTP_SERVER_PID=$!
# get ip address
IP_ADDRESS=$(hostname -I | cut -d ' ' -f 1)
echo "IP: $IP_ADDRESS"

kubectl -n $NAMESPACE exec testpod -- env PORT="$SERVER_PORT" IP="$IP_ADDRESS" UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \
/bin/bash -c 'spark-client.spark-submit \
--username $UU --namespace $NN \
--conf spark.kubernetes.driver.request.cores=100m \
--conf spark.kubernetes.executor.request.cores=100m \
--conf spark.kubernetes.container.image=$IM \
--conf spark.executorEnv.LOKI_URL="http://$IP:$PORT" \
--conf spark.kubernetes.driverEnv.LOKI_URL="http://$IP:$PORT" \
--class org.apache.spark.examples.SparkPi \
local:///opt/spark/examples/jars/$JJ 1000'

# kubectl --kubeconfig=${KUBE_CONFIG} get pods
DRIVER_PODS=$(kubectl get pods --sort-by=.metadata.creationTimestamp -n ${NAMESPACE} | grep driver )
DRIVER_JOB=$(kubectl get pods --sort-by=.metadata.creationTimestamp -n ${NAMESPACE} | grep driver | tail -n 1 | cut -d' ' -f1)

if [[ "${DRIVER_JOB}" == "${PREVIOUS_JOB}" ]]
then
echo "ERROR: Sample job has not run!"
exit 1
fi

# Check job output
# Sample output
# "Pi is roughly 3.13956232343"
pi=$(kubectl logs $(kubectl get pods --sort-by=.metadata.creationTimestamp -n ${NAMESPACE} | grep driver | tail -n 1 | cut -d' ' -f1) -n ${NAMESPACE} | grep 'Pi is roughly' | rev | cut -d' ' -f1 | rev | cut -c 1-3)
echo -e "Spark Pi Job Output: \n ${pi}"

validate_pi_value $pi
validate_logs $LOG_FILE

# kill http server
kill $HTTP_SERVER_PID
}


run_example_job_with_error_in_pod() {
SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"

Expand Down Expand Up @@ -657,6 +721,12 @@ echo -e "########################################"

(setup_user_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod

echo -e "########################################"
echo -e "RUN EXAMPLE JOB WITH LOG FORWARDING"
echo -e "########################################"

(setup_user_context && run_example_job_in_pod_with_log_forwarding && cleanup_user_success) || cleanup_user_failure_in_pod

echo -e "########################################"
echo -e "RUN EXAMPLE JOB WITH ERRORS"
echo -e "########################################"
Expand Down
Loading