Kubernetes benchmarks: Refactor kubectl context to allow sets of clus…

…ters. Also add sanity check to make sure each cluster works as part of initialization, by running a sample pod within. PiperOrigin-RevId: 706069662
google · Dec 14, 2024 · f876a09 · f876a09
1 parent bd0cbf8
commit f876a09
Show file tree

Hide file tree

Showing 47 changed files with 1,302 additions and 664 deletions.
diff --git a/images/gpu/ollama/bench/Dockerfile.x86_64 b/images/gpu/ollama/bench/Dockerfile.x86_64
@@ -1,5 +1,5 @@
 # https://hub.docker.com/r/ollama/ollama
-FROM ollama/ollama:0.1.26
+FROM ollama/ollama:0.5.1
 
 ENV PATH=$PATH:/usr/local/nvidia/bin:/bin/nvidia/bin
 ENV OLLAMA_ORIGINS=*
@@ -8,17 +8,24 @@ ENV OLLAMA_HOST=0.0.0.0:11434
 COPY pull.sh /tmp
 
 # Pre-install models useful for benchmarking.
-# These are huge (total ~120 GiB), but necessary to benchmark
+# These are huge (total ~96 GiB), but necessary to benchmark
 # models of various sizes. They are in their own image file to
 # keep the test-only image lighter by comparison.
-RUN /tmp/pull.sh codellama:7b-instruct
-RUN /tmp/pull.sh codellama:34b-instruct
-RUN /tmp/pull.sh llama2-chinese:7b-chat
-RUN /tmp/pull.sh llama2:13b-chat
-RUN /tmp/pull.sh llama2:70b-chat
-RUN /tmp/pull.sh mistral:7b-instruct
-RUN /tmp/pull.sh mixtral:instruct
-RUN /tmp/pull.sh gemma:2b-instruct
-RUN /tmp/pull.sh gemma:7b-instruct
-RUN /tmp/pull.sh llava:7b-v1.6
-RUN /tmp/pull.sh llava:34b-v1.6
+
+# Useful as embedding model.
+RUN /tmp/pull.sh snowflake-arctic-embed2:568m-l-fp16
+
+# Useful as small model.
+RUN /tmp/pull.sh gemma2:2b-instruct-fp16
+
+# Useful as mid-size model.
+RUN /tmp/pull.sh sailor2:8b-chat-fp16
+
+# Useful as coding-specific model.
+RUN /tmp/pull.sh qwen2.5-coder:7b-instruct-q8_0
+
+# Useful as large model.
+RUN /tmp/pull.sh llama2:70b-chat-q4_K_S
+
+# Useful as vision model.
+RUN /tmp/pull.sh llama3.2-vision:11b-instruct-fp16
diff --git a/images/gpu/pytorch/Dockerfile.x86_64 b/images/gpu/pytorch/Dockerfile.x86_64
@@ -1,29 +1,42 @@
-FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
-
-RUN apt-get update && apt-get install --yes \
-      python3 \
-      python3-distutils \
-      python3-pip \
-      clang \
-      wget \
-      vim \
-      git
-
-RUN python3 -m pip install --ignore-installed \
-      "clang~=$(clang --version | grep -oP 'clang version [.0-9]+' | cut -d' ' -f3)" \
-      torch \
-      torchvision \
-      lightning \
-      numpy \
-      memory_profiler
+FROM nvidia/cuda:12.4.0-devel-ubuntu22.04
+
+# Used for determining the correct pip index URL below.
+ENV CUDA_VERSION=12.4
 
 ENV PYTORCH_DATASETS_DIR=/pytorch-data
 ENV TORCH_HOME=/pytorch-home
+RUN mkdir -p "$TORCH_HOME" && \
+    mkdir -p "$PYTORCH_DATASETS_DIR"
+
+RUN apt-get update && \
+    apt-get install --yes \
+        libgl1-mesa-glx libglib2.0-0 \
+        pkg-config \
+        python3 \
+        python3-distutils \
+        python3-pip \
+        clang \
+        wget \
+        vim \
+        git
+
+RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu$(echo "$CUDA_VERSION" | sed 's~\.~~g')" && \
+    python3 -m pip install --ignore-installed \
+        boto3 \
+        "clang~=$(clang --version | grep -oP 'clang version [.0-9]+' | cut -d' ' -f3)" \
+        lightning \
+        matplotlib \
+        memory_profiler \
+        numba && \
+    python3 -m pip install --ignore-installed \
+        torch \
+        torchvision \
+        torchaudio \
+        numpy \
+        --index-url "$PIP_INDEX_URL"
+
 COPY download_pytorch_datasets.py /tmp/
-# Some PyTorch examples hardcode the data directory to "data", so
-# make a symlink for that too.
-RUN mkdir "$PYTORCH_DATASETS_DIR" && \
-    python3 /tmp/download_pytorch_datasets.py && \
+RUN python3 /tmp/download_pytorch_datasets.py && \
     rm /tmp/download_pytorch_datasets.py
 
 RUN PYTORCH_EXAMPLES_COMMIT=30b310a977a82dbfc3d8e4a820f3b14d876d3bd2 && \
@@ -38,3 +51,40 @@ RUN PYTORCH_EXAMPLES_COMMIT=30b310a977a82dbfc3d8e4a820f3b14d876d3bd2 && \
 
 COPY *.py /
 RUN rm /download_pytorch_datasets.py
+
+RUN PYTORCH_BENCHMARKS_COMMIT=675fb8f537d302a4fef3ed2a67349209e65046ac && \
+    mkdir /pytorch-benchmark && \
+    cd /pytorch-benchmark && \
+    git init && \
+    git remote add origin https://github.com/pytorch/benchmark.git && \
+    git fetch --depth 1 origin "$PYTORCH_BENCHMARKS_COMMIT" && \
+    git checkout FETCH_HEAD
+
+# Note that mobilenet_v2 does not have a requirements.txt file.
+RUN cd /pytorch-benchmark && \
+    python3 -m pip install --ignore-installed \
+        -r requirements.txt \
+        -r torchbenchmark/models/LearningToPaint/requirements.txt \
+        -r torchbenchmark/models/fastNLP_Bert/requirements.txt \
+        -r torchbenchmark/models/hf_BigBird/requirements.txt \
+        -r torchbenchmark/models/speech_transformer/requirements.txt
+
+# These benchmarks are chosen based on diversity of the type of model and their
+# profile with respect to using the GPU and moving data. For more context, see
+# this paper: https://arxiv.org/pdf/2304.14226.pdf
+RUN cd /pytorch-benchmark && \
+    python3 install.py \
+        LearningToPaint \
+        fastNLP_Bert \
+        hf_BigBird \
+        speech_transformer \
+        mobilenet_v2
+
+# Some of these benchmarks download a dataset at runtime.
+# Run them once on CPU just to get this predownloaded into the image.
+RUN cd /pytorch-benchmark && \
+    python3 run.py LearningToPaint --device cpu && \
+    python3 run.py fastNLP_Bert --device cpu && \
+    python3 run.py hf_BigBird --device cpu && \
+    python3 run.py speech_transformer --device cpu && \
+    python3 run.py mobilenet_v2 --device cpu
diff --git a/test/benchmarks/tools/parser_util.go b/test/benchmarks/tools/parser_util.go
@@ -48,26 +48,80 @@ func ParametersToName(params ...Parameter) (string, error) {
 }
 
 // NameToParameters parses the string created by ParametersToName and returns
-// it as a set of Parameters.
-// Example: BenchmarkRuby/server_threads.1/doc_size.16KB-6
-// The parameter part of this benchmark is:
-// "server_threads.1/doc_size.16KB" (BenchmarkRuby is the name, and 6 is GOMAXPROCS)
-// This function will return a slice with two parameters ->
-// {Name: server_threads, Value: 1}, {Name: doc_size, Value: 16KB}
-func NameToParameters(name string) ([]*Parameter, error) {
+// the name components and parameters contained within.
+// The separator between the name and value may either be '.' or '='.
+//
+// Example: "BenchmarkRuby/SubTest/LevelTwo/server_threads.1/doc_size.16KB-6"
+// The parameter part of this benchmark is "server_threads.1/doc_size.16KB",
+// whereas "BenchmarkRuby/SubTest/LevelTwo" is the name, and the "-6" suffix is
+// GOMAXPROCS (optional, may be omitted).
+// This function will return a slice of the name components of the benchmark:
+//
+//	[
+//	  "BenchmarkRuby",
+//	  "SubTest",
+//	  "LevelTwo",
+//	]
+//
+// and a slice of the parameters:
+//
+//	[
+//	  {Name: "server_threads", Value: "1"},
+//	  {Name: "doc_size", Value: "16KB"},
+//	  {Name: "GOMAXPROCS", Value: "6"},
+//	]
+//
+// (and a nil error).
+func NameToParameters(name string) ([]string, []*Parameter, error) {
 	var params []*Parameter
-	for _, cond := range strings.Split(name, "/") {
-		cs := strings.Split(cond, ".")
+	var separator string
+	switch {
+	case strings.IndexRune(name, '.') != -1 && strings.IndexRune(name, '=') != -1:
+		return nil, nil, fmt.Errorf("ambiguity while parsing parameters from benchmark name %q: multiple types of parameter separators are present", name)
+	case strings.IndexRune(name, '.') != -1:
+		separator = "."
+	case strings.IndexRune(name, '=') != -1:
+		separator = "="
+	default:
+		// No separator; use '=' which we know is not present in the name,
+		// but we still need to process the name (even if unparameterized) in
+		// order to possibly extract GOMAXPROCS.
+		separator = "="
+	}
+	var nameComponents []string
+	var firstParameterCond string
+	var goMaxProcs *Parameter
+	split := strings.Split(name, "/")
+	for i, cond := range split {
+		if isLast := i == len(split)-1; isLast {
+			// On the last component, if it contains a dash, it is a GOMAXPROCS value.
+			if dashSplit := strings.Split(cond, "-"); len(dashSplit) >= 2 {
+				goMaxProcs = &Parameter{Name: "GOMAXPROCS", Value: dashSplit[len(dashSplit)-1]}
+				cond = strings.Join(dashSplit[:len(dashSplit)-1], "-")
+			}
+		}
+		cs := strings.Split(cond, separator)
 		switch len(cs) {
 		case 1:
-			params = append(params, &Parameter{Name: cond, Value: cond})
+			if firstParameterCond != "" {
+				return nil, nil, fmt.Errorf("failed to parse params from %q: a non-parametrized component %q was found after a parametrized one %q", name, cond, firstParameterCond)
+			}
+			nameComponents = append(nameComponents, cond)
 		case 2:
+			if firstParameterCond == "" {
+				firstParameterCond = cond
+			}
 			params = append(params, &Parameter{Name: cs[0], Value: cs[1]})
 		default:
-			return nil, fmt.Errorf("failed to parse param: %s", cond)
+			return nil, nil, fmt.Errorf("failed to parse params from %q: %s", name, cond)
 		}
 	}
-	return params, nil
+	if goMaxProcs != nil {
+		// GOMAXPROCS should always be last in order to match the ordering of the
+		// benchmark name.
+		params = append(params, goMaxProcs)
+	}
+	return nameComponents, params, nil
 }
 
 // ReportCustomMetric reports a metric in a set format for parsing.
@@ -93,9 +147,52 @@ func ParseCustomMetric(value, metric string) (*Metric, error) {
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse value: %v", err)
 	}
-	nameUnit := strings.Split(metric, ".")
-	if len(nameUnit) != 2 {
-		return nil, fmt.Errorf("failed to parse metric: %s", metric)
+	separators := []rune{'-', '.'}
+	var separator string
+	for _, sep := range separators {
+		if strings.ContainsRune(metric, sep) {
+			if separator != "" {
+				return nil, fmt.Errorf("failed to parse metric: ambiguous unit separator: %q (is the separator %q or %q?)", metric, separator, string(sep))
+			}
+			separator = string(sep)
+		}
+	}
+	var name, unit string
+	switch separator {
+	case "":
+		unit = metric
+	default:
+		components := strings.Split(metric, separator)
+		name, unit = strings.Join(components[:len(components)-1], ""), components[len(components)-1]
+	}
+	// Normalize some unit names to benchstat defaults.
+	switch unit {
+	case "":
+		return nil, fmt.Errorf("failed to parse metric %q: no unit specified", metric)
+	case "s":
+		unit = "sec"
+	case "nanos":
+		unit = "ns"
+	case "byte":
+		unit = "B"
+	case "bit":
+		unit = "b"
+	default:
+		// Otherwise, leave unit as-is.
+	}
+	// If the metric name is unspecified, it can sometimes be inferred from
+	// the unit.
+	if name == "" {
+		switch unit {
+		case "sec":
+			name = "duration"
+		case "req/sec", "tok/sec":
+			name = "throughput"
+		case "B/sec":
+			name = "bandwidth"
+		default:
+			return nil, fmt.Errorf("failed to parse metric %q: ambiguous metric name, please format the unit as 'name.unit' or 'name-unit'", metric)
+		}
 	}
-	return &Metric{Name: nameUnit[0], Unit: nameUnit[1], Sample: sample}, nil
+	return &Metric{Name: name, Unit: unit, Sample: sample}, nil
 }