Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding a benchmark to run cpuid tool on VMs to retrieve the information about the processor. #5319

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.next.md
Original file line number Diff line number Diff line change
Expand Up @@ -521,3 +521,4 @@
- Update OpenJDK version to default on Debian based OSes and latest on Red Hat
based OSes.
- Update Chromium version to 127.0.6533.88.
- Added cpuid tool as a benchmark.
259 changes: 259 additions & 0 deletions perfkitbenchmarker/linux_benchmarks/cpuid_tool_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Runs cpuid as a benchmark."""

import configparser
from typing import List, NamedTuple, OrderedDict

from perfkitbenchmarker import benchmark_spec
from perfkitbenchmarker import configs
from perfkitbenchmarker import sample


BENCHMARK_NAME = 'cpuid_tool'
BENCHMARK_CONFIG = """
cpuid_tool:
description: Runs cpuid as a benchmark
vm_groups:
default:
vm_spec:
GCP:
machine_type: n2-standard-2
zone: us-central1-c
Azure:
machine_type: Standard_D2s_v5
zone: eastus2-2
AWS:
machine_type: m5.large
zone: us-east-1c
"""


def GetConfig(user_config):
return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)


def Prepare(_):
pass


def Run(bm_spec: benchmark_spec.BenchmarkSpec) -> List[sample.Sample]:
"""Runs the benchmark."""
# TODO(arushigaur): Get bugs from /proc/cpuinfo and add them as metadata.
vm = bm_spec.vm_groups['default'][0]
vm.InstallPackages('cpuid')
version, _ = vm.RemoteCommand('cpuid -v')
stdout, _ = vm.RemoteCommand('taskset -c 0 cpuid -1')
ascii_parsed_output = ParseCpuIdASCIIOutput(stdout)
stdout, _ = vm.RemoteCommand('taskset -c 0 cpuid -r -1')
hexadecimal_parsed_output = ParseCpuIdHexadecimalOutput(stdout)
samples = []
for section in ascii_parsed_output.sections():
for key in dict(ascii_parsed_output[section]):
updated_key = section + '-' + key
samples.append(
sample.Sample(
metric=updated_key,
value=-1,
unit='',
metadata={
'version': version,
'value': ascii_parsed_output[section][key],
},
)
)
for key, value in hexadecimal_parsed_output.items():
samples.append(
sample.Sample(
metric=key,
value=-1,
unit='',
metadata={
'version': version,
'value': value,
},
)
)
return samples


def Cleanup(_):
pass


def ParseCpuIdHexadecimalOutput(output: str):
"""Parses the output of cpuid_tool key value pairs.

Example line:
0x00000000 0x00: eax=0x0000000d ebx=0x68747541 ecx=0x444d4163 edx=0x69746e65
key = 0x00000000_0x00_eax
value = 0x0000000d
key = 0x00000000_0x00_ebx
value = 0x68747541 (and so on)

Args:
output: hexadecimal output of cpuid_tool.

Returns:
A dictionary of key value pairs.
"""
lines = output.splitlines()
data = {}
for line in lines:
key_values_data = line.split(': ', 1)
if len(key_values_data) != 2:
continue
key, values = key_values_data
key = key.strip().replace(' ', '_')
for register_value in values.split(' '):
subkey, value = register_value.split('=', 1)
complete_key = '_'.join([key, subkey])
if complete_key in data:
# Key is unique, adding this exception if key is not unique in future.
raise ValueError('Duplicate key found: %s' % complete_key)
data[complete_key] = value
return data


def IsHeading(line: str):
"""Checks if the line is a heading.

heading can be the following formats:
1. CPU:
2. version information (1/eax):
3. --- cache 0 ---

Args:
line: line to check.

Returns:
True if the line is a heading.
"""
if line.endswith(':') or ('---' in line):
return True
return False


def CountSpaces(line: str):
return len(line) - len(line.lstrip())


def FormatKeyValueLine(line: str) -> str:
"""Formats the key value line to be compatible with configparser.

Handling the following formats:
1. VMPL: VM permission levels = false
2. 0x63: data TLB: 2M/4M pages, 4-way, 32 entries
3. (multi-processing synth) = hyper-threaded (t=2)

Args:
line: line containing key value pair.

Returns:
Config parser compatible key value pair.
"""
if ':' in line:
if '=' not in line:
line = line.replace(':', '=', 1)
line = line.replace(':', '')
line = line.replace('(', '').replace(')', '')
key, value = line.split('=', 1)
return key.strip().replace(' ', '_') + ' = ' + value.strip()


def FormatHeading(line: str) -> str:
"""Format the heading to be compatible with configparser.

Args:
line: line containing heading.

Example headings:
1. CPU:
2. version information (1/eax):
3. --- cache 0 ---

Returns:
Formatted heading
"""
line = line.replace(':', '').replace('---', '')
words = line.split(' ')
words = [w.strip() for w in words if w.strip()]
# removing the last section of the heading if it is inside parenthesis.
# For example, in the heading "version information (1/eax)", "(1/eax)" is
# removed and we get "version information".
if len(words) > 1 and '(' in words[-1]:
words = words[:-1]
# joining the words with underscore: version_information
line = '_'.join(words)
line = line.replace('(', '').replace(')', '')
return line


def BracketizeHeading(line: str) -> str:
"""Bracketize the heading to be compatible with configparser."""
return ''.join(['[', line, ']'])


class MultiOrderedDict(OrderedDict):

def __setitem__(self, key, value):
if isinstance(value, list) and key in self:
self[key].extend(value)
else:
super().__setitem__(key, value)


def ParseCpuIdASCIIOutput(output: str):
"""Parses the output of cpuid_tool into json."""
formatted_lines = []
# stack of headings to maintain the hierarchy based on the spaces
# before the heading
heading_stack = []
HEADING_DETAILS = NamedTuple(
'heading_details',
[('heading', str), ('spaces', int)],
)
for line in output.splitlines():
line = line.lower()
if IsHeading(line):
spaces = CountSpaces(line)
# popping the headings till we reach the parent heading
# (heading with lesser spaces in front than the current heading)
while heading_stack and heading_stack[-1].spaces >= spaces:
heading_stack.pop()
formatted_current_heading = FormatHeading(line)
if not heading_stack:
complete_heading = formatted_current_heading.strip()
else:
complete_heading = '-'.join([
heading_stack[-1].heading,
formatted_current_heading.strip(),
])
heading_stack.append(
HEADING_DETAILS(
complete_heading,
spaces,
)
)
# config parser supports heading in square parenthesis. For eg: [cpu]
formatted_lines.append(BracketizeHeading(complete_heading))
else:
line = FormatKeyValueLine(line) if len(line.strip()) else line
formatted_lines.append(line)
data = '\n'.join(formatted_lines)
parser = configparser.RawConfigParser(
dict_type=MultiOrderedDict, strict=False
)
parser.read_string(data)
return parser
12 changes: 12 additions & 0 deletions perfkitbenchmarker/linux_packages/cpuid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Module containing cpuid installation."""


def YumInstall(vm):
"""Installs CPUID on the VM."""
vm.InstallPackages('sudo dnf install cpuid')


def AptInstall(vm):
"""Installs CPUID on the VM."""
vm.RemoteCommand('sudo apt update')
vm.RemoteCommand('sudo apt install cpuid')
106 changes: 106 additions & 0 deletions tests/linux_benchmarks/cpuid_tool_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import unittest

from perfkitbenchmarker import test_util
from perfkitbenchmarker.linux_benchmarks import cpuid_tool_benchmark as cpuid_tool
from tests import pkb_common_test_case


class CpuidToolTest(
pkb_common_test_case.PkbCommonTestCase, test_util.SamplesTestMixin
):

def testCpuIdToolOutputParser(self):
partial_cpuid_output = """
CPU:
vendor_id = "GenuineIntel"
version information (1/eax):
processor type = primary processor (0)
family = 0x6 (6)
model = 0xf (15)
stepping id = 0x0 (0)
extended family = 0x0 (0)
extended model = 0x4 (4)
(family synth) = 0x6 (6)
(model synth) = 0x4f (79)
miscellaneous (1/ebx):
process local APIC physical ID = 0x0 (0)
maximum IDs for CPUs in pkg = 0x20 (32)
CLFLUSH line size = 0x8 (8)
brand index = 0x0 (0)
cache and TLB information (2):
0x63: data TLB: 2M/4M pages, 4-way, 32 entries
data TLB: 1G pages, 4-way, 4 entries
0x03: data TLB: 4K pages, 4-way, 64 entries
0x76: instruction TLB: 2M/4M pages, fully, 8 entries
0xff: cache data is in CPUID leaf 4
0xb5: instruction TLB: 4K, 8-way, 64 entries
0xf0: 64 byte prefetching
0xc3: L2 TLB: 4K/2M pages, 6-way, 1536 entries
processor serial number = 0004-06F0-0000-0000-0000-0000
Feature Extended Size (0x80000008/edx):
max page count for INVLPGB instruction = 0x0 (0)
RDPRU instruction max input support = 0x0 (0)
(instruction supported synth):
MWAIT = false
deterministic cache parameters (4):
--- cache 0 ---
cache type = data cache (1)
cache level = 0x1 (1)
self-initializing cache level = true
fully associative cache = false
(multi-processing synth) = multi-core (c=12), hyper-threaded (t=2)
(multi-processing method) = Intel leaf 0xb
(APIC widths synth): CORE_width=4 SMT_width=1
(APIC synth): PKG_ID=0 CORE_ID=0 SMT_ID=0
(uarch synth) = Intel Broadwell {shrink of Haswell}, 14nm
(synth) = Intel Xeon E5-1600 / E5-2600 / E5-4600 v4 (Broadwell-E) / E7-4800 / E7-8800 v4 (Broadwell-EX) {shrink of Haswell}, 14nm
"""
output = cpuid_tool.ParseCpuIdASCIIOutput(partial_cpuid_output)
self.assertEqual(
output.sections(),
[
'cpu',
'cpu-version_information',
'cpu-miscellaneous',
'cpu-cache_and_tlb_information',
'cpu-feature_extended_size',
'cpu-instruction_supported_synth',
'cpu-deterministic_cache_parameters',
'cpu-deterministic_cache_parameters-cache_0',
],
)
self.assertEqual(
list(dict(output['cpu-deterministic_cache_parameters-cache_0']).keys()),
[
'cache_type',
'cache_level',
'self-initializing_cache_level',
'fully_associative_cache',
'multi-processing_synth',
'multi-processing_method',
'apic_widths_synth_core_width',
'apic_synth_pkg_id',
'uarch_synth',
'synth',
],
)

def testHexaDecimalParser(self):
cpuid_output = """
CPU:
0x00000000 0x00: eax=0x0000000d ebx=0x68747541 ecx=0x444d4163 edx=0x69746e65
0x00000001 0x00: eax=0x00830f10 ebx=0x0a180800 ecx=0xfef83203 edx=0x178bfbff
0x00000002 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
0x00000003 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
0x00000004 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000
"""
data = cpuid_tool.ParseCpuIdHexadecimalOutput(cpuid_output)
self.assertEqual(data['0x00000000_0x00_eax'], '0x0000000d')
self.assertEqual(data['0x00000000_0x00_ebx'], '0x68747541')
self.assertEqual(data['0x00000000_0x00_ecx'], '0x444d4163')
self.assertEqual(data['0x00000000_0x00_edx'], '0x69746e65')
self.assertEqual(data['0x00000004_0x00_edx'], '0x00000000')


if __name__ == '__main__':
unittest.main()