Skip to content

Commit

Permalink
Wrapper code for nighthawk workload (#446)
Browse files Browse the repository at this point in the history
* adding dockerfile to generate nighthawk wrapper

* Wrapper code for nighthawk workload

* Resolved comments in the first iteration of PR-446

* Resolved comments in the second iteration of PR-446

* added README.md indicating steps to generate nighthawk-base image

* resolved PR comments on nit picks

---------

Co-authored-by: root <[email protected]>
Co-authored-by: Vishnu Challa <[email protected]>
  • Loading branch information
3 people authored Jan 27, 2023
1 parent 1201456 commit c40c5ce
Show file tree
Hide file tree
Showing 4 changed files with 333 additions and 0 deletions.
11 changes: 11 additions & 0 deletions snafu/benchmarks/nighthawk/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
ARG BASE_IMAGE=quay.io/cloud-bulldozer/nighthawk-base:latest
FROM ${BASE_IMAGE}
RUN apt-get update && apt-get install -y software-properties-common gcc && \
add-apt-repository -y ppa:deadsnakes/ppa
RUN apt-get update && apt-get install -y python3.6 python3-distutils python3-pip python3-apt
RUN apt-get update && apt-get install -y redis-server
RUN ln -s /usr/bin/python3 /usr/bin/python
RUN mkdir -p /opt/snafu/
COPY . /opt/snafu/
RUN pip3 install --upgrade pip
RUN pip3 install -e /opt/snafu/
46 changes: 46 additions & 0 deletions snafu/benchmarks/nighthawk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Steps to generate nighthawk-base image

1. Create a ubuntu based pod using the below yaml
```
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: http-scale-client-test
name: http-scale-client-test
spec:
replicas: 1
selector:
matchLabels:
app: http-scale-client-test
template:
metadata:
labels:
app: http-scale-client-test
spec:
imagePullPolicy: Always
hostNetwork: true
tolerations:
- effect: NoSchedule
key: role
operator: Equal
value: workload
containers:
- name: ubuntu
image: ubuntu
command:
- sleep
- inf
```
2. Login to the host using ```oc rsh http-scale-client-test-[ID] /bin/bash```
3. And follow the installation steps here: https://github.com/envoyproxy/nighthawk#building-on-ubuntu
4. Once the binaries are generated copy them to your local file system using `oc cp` command.
5. Create a Dockerfile in the directory which contains binaries and add the following code snippet.
```
FROM ubuntu
LABEL description="This is custom image that contains nighthawk executables"
ENV LogLevel "info"
COPY * /usr/bin/
```
6. Now build the docker file and save the image to your desired image repository.
6 changes: 6 additions & 0 deletions snafu/benchmarks/nighthawk/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# flake8: noqa
"""nighthawk benchmark"""
from snafu.benchmarks.nighthawk.nighthawk import Nighthawk

270 changes: 270 additions & 0 deletions snafu/benchmarks/nighthawk/nighthawk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Wrapper for running the nighthawk workload. See https://github.com/envoyproxy/nighthawk for more information."""
import dataclasses
import json
import os
import socket
import subprocess
from typing import Any, Dict, Iterable, List, Optional
from snafu.benchmarks import Benchmark, BenchmarkResult
from snafu.config import Config, ConfigArgument


@dataclasses.dataclass
class NighthawkStat:
"""Parsed Nighthawk Statistic."""

workload: str
uuid: str
user: str
cluster_name: str
duration: int
targets: List[str]
concurrency: int
connections: int
max_requests_per_connection: int
hostname: str
requested_qps: float
throughput: float
status_codes_1xx: float
status_codes_2xx: float
status_codes_3xx: float
status_codes_4xx: float
status_codes_5xx: float
p50_latency: float
p75_latency: float
p80_latency: float
p90_latency: float
p95_latency: float
p99_latency: float
p99_9_latency: float
avg_latency: float
timestamp: str
bytes_in: float
bytes_out: float
iteration: Optional[int] = None


@dataclasses.dataclass
class NighthawkConfig:
"""Container for common configuration options that are passed to Nighthawk."""

concurrency: Optional[int] = None
duration: Optional[int] = None
connections: Optional[int] = None
max_requests_per_connection: Optional[int] = None
rps: Optional[int] = None
kind: Optional[str] = None
url: Optional[str] = None

@classmethod
def new(cls, stdout: NighthawkStat, config: Config):
"""Create a new instance given instances of :py:mod:`~snafu.config.Config` and NighthawkStat."""

kwargs: Dict[str, Any] = {}
for fields in dataclasses.fields(cls):
val = getattr(stdout, fields.name, None)
if val is None:
val = getattr(config, fields.name, None)
kwargs[fields.name] = val
return cls(**kwargs)


class Nighthawk(Benchmark):
"""Wrapper for the nighthawk benchmark."""

tool_name = "nighthawk"
args = (
ConfigArgument(
"-s",
"--samples",
dest="samples",
env_var="SAMPLES",
default=1,
type=int,
help="Number of times to run the benchmark",
required=True,
),
ConfigArgument(
"--resourcetype",
dest="kind",
env_var="RESOURCETYPE",
help="Provide the resource type for nighthawk run - pod/vm/baremetal",
required=True,
),
ConfigArgument(
"--url",
dest="url",
env_var="URL",
help="Provide the url to make hits",
required=True,
),
ConfigArgument(
"--rps",
dest="rps",
env_var="RPS",
help="The target requests-per-second rate",
default=5,
type=int),
ConfigArgument(
"--max_requests_per_connection",
dest="max_requests_per_connection",
env_var="MAX_REQUESTS_PER_CONNECTION",
help="Max requests per connection",
default=4294937295,
type=int),
ConfigArgument(
"--connections",
dest="connections",
env_var="CONNECTIONS",
help="The maximum allowed number of concurrent connections per event loop",
default=100,
type=int),
ConfigArgument(
"--duration",
dest="duration",
env_var="DURATION",
help="The number of seconds that the test should run",
default=60,
type=int),
ConfigArgument(
"--concurrency",
dest="concurrency",
env_var="CONCURRENCY",
help="The number of concurrent event loops that should be used. Specify 'auto' to "\
"let Nighthawk leverage all vCPUs that have affinity to the Nighthawk process"\
". Note that increasing this results in an effective load multiplier combined"\
" with the configured --rps and --connections values",
default=1,
type=int),
)

def setup(self) -> bool:
"""Parse config and check for validations."""
self.config.parse_args()
self.logger.debug(f"Got config: {vars(self.config)}")

if not getattr(self.config, "user", False) or not getattr(self.config, "uuid", False):
self.logger.critical("Missing required metadata. Need both user and uuid to continue")
return False

return True

def _parse_stdout(self) -> NighthawkStat:
"""
Return parsed stdout of Nighthawk sample.
Returns
-------
NighthawkStat
"""

data = json.load(open("nighthawk.json"))
# populating latency in milliseconds and throughput as queries per second.
latency_percentiles = {}
duration_histogram = data['DurationHistogram']
for each_percentile in duration_histogram['Percentiles']:
percentile = str(each_percentile['Percentile'])
if percentile not in latency_percentiles.keys():
latency_percentiles[percentile] = 0
latency_percentiles[percentile] += each_percentile['Value'] * 1000

status_codes = { "1xx": 0, "2xx": 0, "3xx": 0, "4xx": 0, "5xx": 0 }
for key, value in data["RetCodes"].items():
status_code = int(key)
request_count = int(value)
if 100 <= status_code < 200:
status_codes["1xx"] += request_count
elif 200 <= status_code < 300:
status_codes["2xx"] += request_count
elif 300 <= status_code < 400:
status_codes["3xx"] += request_count
elif 400 <= status_code < 500:
status_codes["4xx"] += request_count
else:
status_codes["5xx"] += request_count

return NighthawkStat(
workload="nighthawk",
uuid=self.config.uuid,
user=self.config.user,
cluster_name=os.getenv("clustername", "mycluster"),
duration=int(self.config.duration),
targets=[self.config.url],
concurrency=self.config.concurrency,
connections=self.config.connections,
max_requests_per_connection=self.config.max_requests_per_connection,
hostname=socket.gethostname(),
requested_qps=data['RequestedQPS'],
throughput=data["ActualQPS"],
status_codes_1xx=status_codes["1xx"],
status_codes_2xx=status_codes["2xx"],
status_codes_3xx=status_codes["3xx"],
status_codes_4xx=status_codes["4xx"],
status_codes_5xx=status_codes["5xx"],
p50_latency=latency_percentiles.get("50", None),
p75_latency=latency_percentiles.get("75", None),
p80_latency=latency_percentiles.get("80", None),
p90_latency=latency_percentiles.get("90", None),
p95_latency=latency_percentiles.get("95", None),
p99_latency=latency_percentiles.get("99", None),
p99_9_latency=latency_percentiles.get("99.9", None),
avg_latency=duration_histogram['Avg'] * 1000,
timestamp=data['StartTime'],
bytes_in=float(data['BytesReceived']),
bytes_out=float(data['BytesSent'])
)

def _run_nighthawk(self):
"""
Method to execute nighthawk command.
"""

cmd = (
"nighthawk_client --concurrency {0} --duration {1} --connections {2} "
"--max-requests-per-connection {3} --rps {4} --output-format fortio {5} > nighthawk.json"
).format(self.config.concurrency, self.config.duration, self.config.connections,
self.config.max_requests_per_connection, self.config.rps, self.config.url)
self.logger.info(cmd)
p = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return p.stdout.strip().decode("utf-8"), p.stderr.strip().decode("utf-8"), p.returncode


def collect(self) -> Iterable[BenchmarkResult]:
"""
Run nighthawk benchmark ``self.config.samples`` number of times.
Returns immediately if a sample fails. Will attempt to Nighthawk run for each sample.
"""

_plural = "s" if self.config.samples > 1 else ""
self.logger.info(f"Collecting {self.config.samples} sample{_plural} of Nighthawk")

for s in range(1, self.config.samples + 1):
self.logger.info("Starting nighthawk sample %d out of %d with uuid %s" % (s, self.config.samples, self.config.uuid))
stdout, stderr, rc = self._run_nighthawk()
if rc:
self.logger.critical("Nighthawk failed with returncode %d, stopping benchmark" % rc)
self.logger.critical("stdout: %s" % stdout)
self.logger.critical("stderr: %s" % stderr)
exit(1)
parsed_data: NighthawkStat = self._parse_stdout()
parsed_data.iteration = s
config: NighthawkConfig = NighthawkConfig.new(parsed_data, self.config)
result: BenchmarkResult = self.create_new_result(
data=dataclasses.asdict(parsed_data),
config=dataclasses.asdict(config),
tag="results",
)
yield result
self.logger.info(f"{'-'*50}")
self.logger.info(f"Got sample result: {result}")
self.logger.info(f"{'-'*50}")
self.logger.info("Finished executing nighthawk sample %d out of %d" % (s, self.config.samples))
self.logger.info(f"Successfully collected {self.config.samples} sample{_plural} of nighthawk.")

@staticmethod
def cleanup() -> bool:
"""Nighthawk doesn't have any cleanup tasks, therefore this method just returns ``True``."""
return True

0 comments on commit c40c5ce

Please sign in to comment.