diff --git a/tools/Makefile b/tools/Makefile index 722b261..52a8dae 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -44,8 +44,17 @@ endif tar -xf async-profiler-$(ASYNC_PROFILER_VERSION)-linux-x64.tar.gz && mv async-profiler-$(ASYNC_PROFILER_VERSION)-linux-x64 async-profiler endif -# Use a local copy of avx-turbo source until the upstream makes the required changes +AVX_TURBO_VERSION := "9cfe8bf" avx-turbo: +ifeq ("$(wildcard avx-turbo)","") + git clone https://github.com/travisdowns/avx-turbo.git +else + cd avx-turbo && git checkout master && git pull +endif + cd avx-turbo && git checkout $(AVX_TURBO_VERSION) + # apply our patches to avx-turbo, aperf/mperf first because it doesn't add/remove any lines + cd avx-turbo && git apply ../avx-turbo-patches/0001-use-fixed-CPU-number-to-determine-if-APERF-MPERF-are.patch + cd avx-turbo && git apply ../avx-turbo-patches/0001-Add-CPU-ID-pinning-option.patch cd avx-turbo && make # if you change the version, check the sed hacks below diff --git a/tools/avx-turbo-patches/0001-Add-CPU-ID-pinning-option.patch b/tools/avx-turbo-patches/0001-Add-CPU-ID-pinning-option.patch new file mode 100644 index 0000000..220a926 --- /dev/null +++ b/tools/avx-turbo-patches/0001-Add-CPU-ID-pinning-option.patch @@ -0,0 +1,82 @@ +From aedb82d512237e761abd909bbd5d9036a3b39bef Mon Sep 17 00:00:00 2001 +From: "Harper, Jason M" +Date: Fri, 30 May 2025 08:05:00 -0700 +Subject: [PATCH] Add CPU ID pinning option + +Signed-off-by: Harper, Jason M +--- + avx-turbo.cpp | 23 ++++++++++++++++++----- + 1 file changed, 18 insertions(+), 5 deletions(-) + +diff --git a/avx-turbo.cpp b/avx-turbo.cpp +index c7d4dc0..c794b7b 100644 +--- a/avx-turbo.cpp ++++ b/avx-turbo.cpp +@@ -189,7 +189,7 @@ args::ValueFlag arg_min_threads{parser, "MIN", "The minimum number of threa + args::ValueFlag arg_max_threads{parser, "MAX", "The maximum number of threads to use", {"max-threads"}}; + args::ValueFlag arg_num_cpus{parser, "CPUS", "Override number of available CPUs", {"num-cpus"}}; + args::ValueFlag arg_warm_ms{parser, "MILLISECONDS", "Warmup milliseconds for each thread after pinning (default 100)", {"warmup-ms"}, 100}; +- ++args::ValueFlag arg_cpuids{parser, "CPUIDS", "Pin threads to comma-separated list of CPU IDs (default sequential ids)", {"cpuids"}}; + + bool verbose; + +@@ -614,6 +614,7 @@ struct warmup { + + struct test_thread { + size_t id; ++ size_t cpu_id; + hot_barrier* start_barrier; + hot_barrier* stop_barrier; + +@@ -627,8 +628,8 @@ struct test_thread { + + std::thread thread; + +- test_thread(size_t id, hot_barrier& start_barrier, hot_barrier& stop_barrier, const test_func *test, size_t iters, bool use_aperf) : +- id{id}, start_barrier{&start_barrier}, stop_barrier{&stop_barrier}, test{test}, ++ test_thread(size_t id, size_t cpu_id, hot_barrier& start_barrier, hot_barrier& stop_barrier, const test_func *test, size_t iters, bool use_aperf) : ++ id{id}, cpu_id{cpu_id}, start_barrier{&start_barrier}, stop_barrier{&stop_barrier}, test{test}, + iters{iters}, use_aperf{use_aperf}, thread{std::ref(*this)} + { + // if (verbose) printf("Constructed test in thread %lu, this = %p\n", id, this); +@@ -641,7 +642,7 @@ struct test_thread { + void operator()() { + // if (verbose) printf("Running test in thread %lu, this = %p\n", id, this); + if (!arg_no_pin) { +- pin_to_cpu(id); ++ pin_to_cpu(cpu_id); + } + aperf_ghz aperf_timer; + outer_timer& outer = use_aperf ? static_cast(aperf_timer) : dummy_outer::dummy; +@@ -839,6 +840,18 @@ int main(int argc, char** argv) { + zeroupper(); + auto specs = filter_tests(isas_supported, cpus); + ++ // parse comma separate list of cpu_ids into an array ++ std::vector cpu_ids; ++ if (arg_cpuids) { ++ for (auto& id : split(arg_cpuids.Get(), ",")) { ++ cpu_ids.push_back(std::atoi(id.c_str())); ++ } ++ } else { ++ for (int i = 0; i < (int)cpus.size(); i++) { ++ cpu_ids.push_back(i); ++ } ++ } ++ + size_t last_thread_count = -1u; + std::vector results_list; + for (auto& spec : specs) { +@@ -857,7 +870,7 @@ int main(int argc, char** argv) { + std::deque threads; + hot_barrier start{spec.count()}, stop{spec.count()}; + for (auto& test : spec.thread_funcs) { +- threads.emplace_back(threads.size(), start, stop, &test, iters, use_aperf); ++ threads.emplace_back(threads.size(), cpu_ids[threads.size()], start, stop, &test, iters, use_aperf); + } + + results_list.emplace_back(&spec); +-- +2.34.1 + diff --git a/tools/avx-turbo-patches/0001-use-fixed-CPU-number-to-determine-if-APERF-MPERF-are.patch b/tools/avx-turbo-patches/0001-use-fixed-CPU-number-to-determine-if-APERF-MPERF-are.patch new file mode 100644 index 0000000..b090193 --- /dev/null +++ b/tools/avx-turbo-patches/0001-use-fixed-CPU-number-to-determine-if-APERF-MPERF-are.patch @@ -0,0 +1,28 @@ +From 261bb31a67dd21834150fdbbad07d1f094407752 Mon Sep 17 00:00:00 2001 +From: "Harper, Jason M" +Date: Fri, 30 May 2025 06:59:56 -0700 +Subject: [PATCH] use fixed CPU number to determine if APERF/MPERF are + supported + +--- + avx-turbo.cpp | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/avx-turbo.cpp b/avx-turbo.cpp +index c7d4dc0..96bc1c2 100644 +--- a/avx-turbo.cpp ++++ b/avx-turbo.cpp +@@ -279,8 +279,8 @@ struct aperf_ghz : outer_timer { + */ + static bool is_supported() { + uint64_t dummy; +- return read_msr_cur_cpu(MSR_IA32_MPERF, &dummy) == 0 +- && read_msr_cur_cpu(MSR_IA32_APERF, &dummy) == 0; ++ return read_msr(1, MSR_IA32_MPERF, &dummy) == 0 ++ && read_msr(1, MSR_IA32_APERF, &dummy) == 0; + } + + virtual void start() override { +-- +2.34.1 + diff --git a/tools/avx-turbo/.github/workflows/build.yml b/tools/avx-turbo/.github/workflows/build.yml deleted file mode 100644 index 848b472..0000000 --- a/tools/avx-turbo/.github/workflows/build.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: build -on: [push] -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-20.04] - cpp_compiler: [g++, g++-7, g++-8, g++-9, clang++, clang++-9] - include: - - c_compiler: gcc - - cpp_compiler: g++-7 - c_compiler: gcc-7 - - cpp_compiler: g++-8 - c_compiler: gcc-8 - - cpp_compiler: g++-9 - c_compiler: gcc-9 - - cpp_compiler: clang++ - c_compiler: clang - - cpp_compiler: clang++-9 - c_compiler: clang-9 - steps: - - name: Install C Compiler - if: ${{ startsWith(matrix.c_compiler, 'gcc-') || startsWith(matrix.c_compiler, 'clang-') }} - run: | - sudo apt-get update - sudo apt-get install -y ${{ matrix.c_compiler }} - - name: Install C++ Compiler - if: ${{ startsWith(matrix.cpp_compiler, 'g++-') || startsWith(matrix.cpp_compiler, 'clang++-') }} - run: | - sudo apt-get update - sudo apt-get install -y ${{ matrix.cpp_compiler }} - - uses: actions/checkout@v2 - - run: lscpu - - run: make -j2 CC=${{ matrix.c_compiler }} CXX=${{ matrix.cpp_compiler }} - - run: ./unit-test - - run: ./avx-turbo --no-pin --max-threads 2 - diff --git a/tools/avx-turbo/.gitignore b/tools/avx-turbo/.gitignore deleted file mode 100644 index d79da86..0000000 --- a/tools/avx-turbo/.gitignore +++ /dev/null @@ -1,59 +0,0 @@ -# Prerequisites -*.d - -# Compiled Object files -*.slo -*.lo -*.o -*.obj - -# Precompiled Headers -*.gch -*.pch - -# Compiled Dynamic libraries -*.so -*.dylib -*.dll - -# Fortran module files -*.mod -*.smod - -# Compiled Static libraries -*.lai -*.la -*.a -*.lib - -# Executables -*.exe -*.out -*.app -avx-turbo -unit-test - -# Eclipse CDT artifacts -.cproject -.project -.settings - -# nasm listing file -*.list - -# libpfm4 directory -libpfm-4*/* - -# core files -/core* -/vgcore* - -# perf files -/perf.* - -/tmp/* - -dummy.rebuild - -# local make config file -local.mk diff --git a/tools/avx-turbo/.travis.yml b/tools/avx-turbo/.travis.yml deleted file mode 100644 index fa6dfbb..0000000 --- a/tools/avx-turbo/.travis.yml +++ /dev/null @@ -1,70 +0,0 @@ -dist: xenial -language: cpp -sudo: false -branches: - except: - - /^(wip\/)?(appveyor|msvc|mingw|windows)(\-.+)?$/ - -addons: - apt: - sources: &default_sources - - ubuntu-toolchain-r-test - -# the anchors element doesn't do anything but itself except define some anchors to be used later as aliases -anchors: - - &unit_command ./unit-test - - &uarch_command ./uarch-bench --test-tag=~slow - -matrix: - include: - # clang-5.0 is the default installed on travis VMs - - compiler: clang-default - env: TRUE_CC=clang TRUE_CXX=clang++ CXXFLAGS=-stdlib=libc++ - addons: - apt: - sources: - - *default_sources - - llvm-toolchain-trusty-5.0 - packages: - - libc++abi1 - - libc++1 - - compiler: gcc-4.9 - env: TRUE_CC=gcc-4.9 TRUE_CXX=g++-4.9 - addons: - apt: - sources: - - *default_sources - packages: - - gcc-4.9 - - g++-4.9 - - compiler: gcc-6 - env: TRUE_CC=gcc-6 TRUE_CXX=g++-6 - addons: - apt: - sources: - - *default_sources - packages: - - gcc-6 - - g++-6 - - compiler: clang-6.0 - env: TRUE_CC=clang-6.0 TRUE_CXX=clang++-6.0 - addons: - apt: - sources: - - *default_sources - - llvm-toolchain-xenial-6.0 - packages: - - clang-6.0 - - clang++-6.0 - -before_install: -# Travis will set CC and CXX after the env commands specified in the matrix are run, overwriting whatever -# we've specified there, so we need to reset them here. See also https://github.com/travis-ci/travis-ci/issues/6633 . - - CC="${TRUE_CC:-$CC}" - - CXX="${TRUE_CXX:-$CXX}" - - -script: -- set -e && echo "CC is ${CC}, CXX is ${CXX}" && ${CC} --version && ${CXX} --version -- make -- ./avx-turbo --no-pin --max-threads 2 diff --git a/tools/avx-turbo/LICENSE b/tools/avx-turbo/LICENSE deleted file mode 100644 index 3b5b974..0000000 --- a/tools/avx-turbo/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2018 travisdowns - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/tools/avx-turbo/Makefile b/tools/avx-turbo/Makefile deleted file mode 100644 index 606a785..0000000 --- a/tools/avx-turbo/Makefile +++ /dev/null @@ -1,61 +0,0 @@ -include config.mk - -# rebuild when makefile changes --include dummy.rebuild - -.PHONY: all clean - -ASM_FLAGS ?= -DNASM_ENABLE_DEBUG=$(NASM_DEBUG) -w+all -l x86_methods.list - -ifneq ($(CPU_ARCH),) -ARCH_FLAGS := -march=$(CPU_ARCH) -endif -O_LEVEL ?= -O2 - -COMMON_FLAGS := -MMD -Wall -Wextra -Wundef $(ARCH_FLAGS) -g $(O_LEVEL) -CPPFLAGS := $(COMMON_FLAGS) -CFLAGS := $(COMMON_FLAGS) - -SRC_FILES := $(wildcard *.cpp) $(wildcard *.c) - -OBJECTS := $(SRC_FILES:.cpp=.o) asm-methods.o -OBJECTS := $(OBJECTS:.c=.o) -DEPFILES = $(OBJECTS:.o=.d) -# $(info OBJECTS=$(OBJECTS)) - -VPATH = test - -########### -# Targets # -########### - -all: avx-turbo unit-test - --include $(DEPFILES) unit-test.d - -clean: - rm -f *.d *.o avx-turbo - -dist-clean: clean $(CLEAN_TARGETS) - -unit-test: unit-test.o unit-test-main.o cpuid.o - $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) $(LDLIBS) -std=c++11 $^ -o $@ - -avx-turbo: $(OBJECTS) - $(CXX) $(OBJECTS) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) $(LDLIBS) -std=c++11 -lpthread -o $@ - -%.o : %.c - $(CC) $(CFLAGS) -c -std=c11 -o $@ $< - -%.o : %.cpp - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -std=c++11 -o $@ $< - -%.o: %.asm nasm-utils-inc.asm - $(ASM) $(ASM_FLAGS) -f elf64 $< - -LOCAL_MK = $(wildcard local.mk) - -# https://stackoverflow.com/a/3892826/149138 -dummy.rebuild: Makefile config.mk $(LOCAL_MK) - touch $@ - $(MAKE) -s clean diff --git a/tools/avx-turbo/README.md b/tools/avx-turbo/README.md deleted file mode 100644 index 425754f..0000000 --- a/tools/avx-turbo/README.md +++ /dev/null @@ -1,155 +0,0 @@ -# avx-turbo - -Test the non-AVX, AVX2 and AVX-512 speeds for various types of CPU intensive loops with varying scalar and SIMD instructions, across different active core counts. - -Currently it is **Linux only** (it does run on WSL and WSL2 on Windows), but the basic testing mechanism could be ported to OSX and Windows as well (help welcome). - -# CI Status - -**Build:** [![Master Branch](https://github.com/travisdowns/avx-turbo/workflows/build/badge.svg)](https://github.com/travisdowns/avx-turbo/actions?query=workflow%3Abuild+branch%3Amaster) - - -# build - - make - -# msr kernel module - -You should load the `msr` kernel module if it is not already loaded. This is as simple as: - - modprobe msr - -Or as complex as (if you want nice messages about what happened): - - lsmod | grep -q msr && echo "MSR already loaded" || { echo "Loading MSR module"; sudo modprobe msr ; } - -# run - -You get the most info running as root (since we can read various MSRs to calculate the frequency directly): - - sudo ./avx-turbo - -You can also run it without root, but you only get the "Mops" reading (but this can be read directly as frequency -for the 1-latency tests). - -## spec-based tests - -The default behavior for ./avx-turbo is to run tests with various thread counts, but with the same test on each thread. For example, the `avx256_fma` test means that the same FMA-using test code will be run on _each_ test thread. - -An alternate approach is availe with so-called _spec-based_ tests. This lets you specificy exactly what each thread in a test will run. The general form of a specification is: `test1/thead-count1[,test2/thread-count2[,...]]`. For example, -if you run `sudo ./avx-turbo --spec avx256_fma/1,scalar_iadd/3` you'll get one copy of `avx256_fma` and three copies of `scalar_iadd` running in parallel. - -This mode is useful to testing that happens when not all cores are doing the same thing. - -# help - -Try: - - ./avx-turbo --help - -for a summary of some options something like this: - -``` - ./avx-turbo {OPTIONS} - - avx-turbo: Determine AVX2 and AVX-512 downclocking behavior - - OPTIONS: - - -h, --help Display this help menu - --force-tsc-calibrate Force manual TSC calibration loop, even - if cpuid TSC Hz is available - --no-pin Don't try to pin threads to CPU - gives - worse results but works around affinity - issues on TravisCI - --verbose Output more info - --no-barrier Don't sync up threads before each test - (no real purpose) - --list List the available tests and their - descriptions - --allow-hyperthreads By default we try to filter down the - available cpus to include only physical - cores, but with this option we'll use - all logical cores meaning you'll run two - tests on cores with hyperthreading - --test=[TEST-ID] Run only the specified test (by ID) - --spec=[SPEC] Run a specific type of test specified by - a specification string - --iters=[ITERS] Run the test loop ITERS times (default - 100000) - --min-threads=[MIN] The minimum number of threads to use - --max-threads=[MAX] The maximum number of threads to use - --warmup-ms=[MILLISECONDS] Warmup milliseconds for each thread - after pinning (default 100) - -``` - -# output - -The output looks like this: - -``` -Running as root : [YES] -CPU supports AVX2 : [YES] -CPU supports AVX-512: [NO ] -cpuid = eax = 2, ebx = 216, ecx = 0, edx = 0 -cpu: family = 6, model = 94, stepping = 3 -tsc_freq = 2592.0 MHz (from cpuid leaf 0x15) -Will test up to 4 CPUs -============================== Threads: 1 ============================== -ID | Description | Mops | A/M-ratio | A/M-MHz | M/tsc-ratio -scalar_iadd | Scalar integer adds | 2594 | 1.00 | 2592 | 1.00 -avx128_iadd | 128-bit integer adds | 2594 | 1.00 | 2592 | 1.00 -avx128_imul | 128-bit integer muls | 519 | 1.00 | 2592 | 1.00 -avx128_fma | 128-bit 64-bit FMAs | 649 | 1.00 | 2592 | 1.00 -avx256_iadd | 256-bit integer adds | 2594 | 1.00 | 2592 | 1.00 -avx256_imul | 256-bit integer muls | 519 | 1.00 | 2592 | 1.00 -avx256_fma | 256-bit serial DP FMAs | 648 | 1.00 | 2592 | 1.00 -avx256_fma_t | 256-bit parallel DP FMAs | 5189 | 1.00 | 2592 | 1.00 -========================================================================= - -============================== Threads: 2 ============================== -ID | Description | Mops | A/M-ratio | A/M-MHz | M/tsc-ratio -scalar_iadd | Scalar integer adds | 2593, 2593 | 1.00, 1.00 | 2592, 2592 | 1.00, 1.00 -avx128_iadd | 128-bit integer adds | 2594, 2594 | 1.00, 1.00 | 2592, 2592 | 1.00, 1.00 -avx128_imul | 128-bit integer muls | 519, 519 | 1.00, 1.00 | 2592, 2592 | 1.00, 1.00 -avx128_fma | 128-bit 64-bit FMAs | 648, 649 | 1.00, 1.00 | 2592, 2592 | 1.00, 1.00 -avx256_iadd | 256-bit integer adds | 2594, 2594 | 1.00, 1.00 | 2592, 2592 | 1.00, 1.00 -avx256_imul | 256-bit integer muls | 519, 519 | 1.00, 1.00 | 2592, 2592 | 1.00, 1.00 -avx256_fma | 256-bit serial DP FMAs | 648, 648 | 1.00, 1.00 | 2592, 2592 | 1.00, 1.00 -avx256_fma_t | 256-bit parallel DP FMAs | 5188, 5189 | 1.00, 1.00 | 2592, 2592 | 1.00, 1.00 -========================================================================= - -============================== Threads: 3 ============================== -ID | Description | Mops | A/M-ratio | A/M-MHz | M/tsc-ratio -scalar_iadd | Scalar integer adds | 2594, 2594, 2594 | 1.00, 1.00, 1.00 | 2592, 2592, 2592 | 1.00, 1.00, 1.00 -avx128_iadd | 128-bit integer adds | 2594, 2594, 2594 | 1.00, 1.00, 1.00 | 2592, 2592, 2592 | 1.00, 1.00, 1.00 -avx128_imul | 128-bit integer muls | 519, 519, 519 | 1.00, 1.00, 1.00 | 2592, 2592, 2592 | 1.00, 1.00, 1.00 -avx128_fma | 128-bit 64-bit FMAs | 649, 648, 648 | 1.00, 1.00, 1.00 | 2592, 2592, 2592 | 1.00, 1.00, 1.00 -avx256_iadd | 256-bit integer adds | 2594, 2594, 2594 | 1.00, 1.00, 1.00 | 2592, 2592, 2592 | 1.00, 1.00, 1.00 -avx256_imul | 256-bit integer muls | 519, 519, 519 | 1.00, 1.00, 1.00 | 2592, 2592, 2592 | 1.00, 1.00, 1.00 -avx256_fma | 256-bit serial DP FMAs | 649, 648, 649 | 1.00, 1.00, 1.00 | 2592, 2592, 2592 | 1.00, 1.00, 1.00 -avx256_fma_t | 256-bit parallel DP FMAs | 5190, 5189, 5190 | 1.00, 1.00, 1.00 | 2592, 2592, 2592 | 1.00, 1.00, 1.00 -========================================================================= - -============================== Threads: 4 ============================== -ID | Description | Mops | A/M-ratio | A/M-MHz | M/tsc-ratio -scalar_iadd | Scalar integer adds | 2594, 2594, 2594, 2594 | 1.00, 1.00, 1.00, 1.00 | 2592, 2592, 2592, 2592 | 1.00, 1.00, 1.00, 1.00 -avx128_iadd | 128-bit integer adds | 2593, 2594, 2594, 2594 | 1.00, 1.00, 1.00, 1.00 | 2592, 2592, 2592, 2592 | 1.00, 1.00, 1.00, 1.00 -avx128_imul | 128-bit integer muls | 519, 519, 519, 519 | 1.00, 1.00, 1.00, 1.00 | 2592, 2592, 2592, 2592 | 1.00, 1.00, 1.00, 1.00 -avx128_fma | 128-bit 64-bit FMAs | 648, 648, 649, 648 | 1.00, 1.00, 1.00, 1.00 | 2592, 2592, 2592, 2592 | 1.00, 1.00, 1.00, 1.00 -avx256_iadd | 256-bit integer adds | 2594, 2594, 2594, 2594 | 1.00, 1.00, 1.00, 1.00 | 2592, 2592, 2592, 2592 | 1.00, 1.00, 1.00, 1.00 -avx256_imul | 256-bit integer muls | 519, 519, 519, 519 | 1.00, 1.00, 1.00, 1.00 | 2592, 2592, 2592, 2592 | 1.00, 1.00, 1.00, 1.00 -avx256_fma | 256-bit serial DP FMAs | 648, 648, 648, 648 | 1.00, 1.00, 1.00, 1.00 | 2592, 2592, 2592, 2592 | 1.00, 1.00, 1.00, 1.00 -avx256_fma_t | 256-bit parallel DP FMAs | 5189, 5189, 5189, 5189 | 1.00, 1.00, 1.00, 1.00 | 2592, 2592, 2592, 2592 | 1.00, 1.00, 1.00, 1.00 -========================================================================= -``` - -The headings are: - - - `ID` The ID for the test, which you can use with the `--test` argument to only run a specific test (handy when you want to focus on one test to read the frequency externally, e.g., via `perf`). - - `Description` Yes, it's a description. - - `Mops` Million operations per second. Every test runs a loop of the same type of instruction and this is how many millions of those instructions were executed per second. This is handy since this value corresponds exactly to frequency in MHz for tests with serially dependent 1-latency instructions, which here are all the "integer adds" tests. - - `A/M` This is the ratio of the `APERF` and `MPERF` ratios exposed in an MSR. For details, see the [Intel SDM Vol 3](https://software.intel.com/en-us/download/intel-64-and-ia-32-architectures-sdm-combined-volumes-3a-3b-3c-and-3d-system-programming-guide), but basically APERF is a free running counter of actual cycles (i.e., varying with the CPU frequency), while MPERF counts at a constant rate, usually the processor's nominal frequency. A ratio of 1.0 therefore means that the CPU was is running, on average, at the nominal frequency during the test (I had turbo off, that's why you see 1.00 everywhere). Lower than 1 means lower than nominal frequencies (e.g., due to running heavy AVX code). - - `A/M-MHz` This is the measured frequency over the duration of the test, based on the `APERF` and `MPERF` ratio described above, multiplied by the base (TSC) frequency. Note that this only counts "non-halted" periods, so if the CPU was running at 1000 MHz half the time but halted the other half of the time (due to a frequency transition), you'd see 1000 MHz here, not 500 MHz. - - `M/tsc-ratio` This shows the ration of the `MPERF` register to the TSC (time stamp counter) over the duration of the test. These counters count at the same rate, except that `MPERF` only counts "unhalted" cycles, while the TSC counts all cycles, so this ratio gives you an indication of the "lost" cycles due to halt events. A big source of halt events is frequency transitions in the turbo range: on my Skylake client CPU, any time another core starts up, the allowed turbo ratio changes, so the CPU halts for perhaps 20,000 cycles, so with moderate activity I often see ratios of 0.9 which means that 10% of the time my CPU is doing nothing. To get a "true" frequency, you should multiply this ratio by the `A/M-MHz` column, which would be the actual average frequency, counting halted periods as zero. diff --git a/tools/avx-turbo/args.hxx b/tools/avx-turbo/args.hxx deleted file mode 100644 index c3752b4..0000000 --- a/tools/avx-turbo/args.hxx +++ /dev/null @@ -1,2602 +0,0 @@ -/* Copyright (c) 2016-2017 Taylor C. Richberger and Pavel - * Belikov - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** \file args.hxx - * \brief this single-header lets you use all of the args functionality - * - * The important stuff is done inside the args namespace - */ - -#ifndef ARGS_HXX -#define ARGS_HXX - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef ARGS_TESTNAMESPACE -namespace argstest -{ -#else - -/** \namespace args - * \brief contains all the functionality of the args library - */ -namespace args -{ -#endif - /** Getter to grab the value from the argument type. - * - * If the Get() function of the type returns a reference, so does this, and - * the value will be modifiable. - */ - template - auto get(Option &option_) -> decltype(option_.Get()) - { - return option_.Get(); - } - - /** (INTERNAL) Count UTF-8 glyphs - * - * This is not reliable, and will fail for combinatory glyphs, but it's - * good enough here for now. - * - * \param string The string to count glyphs from - * \return The UTF-8 glyphs in the string - */ - inline std::string::size_type Glyphs(const std::string &string_) - { - std::string::size_type length = 0; - for (const char c: string_) - { - if ((c & 0xc0) != 0x80) - { - ++length; - } - } - return length; - } - - /** (INTERNAL) Wrap a string into a vector of lines - * - * This is quick and hacky, but works well enough. You can specify a - * different width for the first line - * - * \param width The width of the body - * \param the width of the first line, defaults to the width of the body - * \return the vector of lines - */ - inline std::vector Wrap(const std::string &in, const std::string::size_type width, std::string::size_type firstlinewidth = 0) - { - // Preserve existing line breaks - const auto newlineloc = in.find('\n'); - if (newlineloc != in.npos) - { - auto first = Wrap(std::string(in, 0, newlineloc), width); - auto second = Wrap(std::string(in, newlineloc + 1), width); - first.insert( - std::end(first), - std::make_move_iterator(std::begin(second)), - std::make_move_iterator(std::end(second))); - return first; - } - if (firstlinewidth == 0) - { - firstlinewidth = width; - } - auto currentwidth = firstlinewidth; - - std::istringstream stream(in); - std::vector output; - std::ostringstream line; - std::string::size_type linesize = 0; - while (stream) - { - std::string item; - stream >> item; - auto itemsize = Glyphs(item); - if ((linesize + 1 + itemsize) > currentwidth) - { - if (linesize > 0) - { - output.push_back(line.str()); - line.str(std::string()); - linesize = 0; - currentwidth = width; - } - } - if (itemsize > 0) - { - if (linesize) - { - ++linesize; - line << " "; - } - line << item; - linesize += itemsize; - } - } - if (linesize > 0) - { - output.push_back(line.str()); - } - return output; - } - -#ifdef ARGS_NOEXCEPT - /// Error class, for when ARGS_NOEXCEPT is defined - enum class Error - { - None, - Usage, - Parse, - Validation, - Required, - Map, - Extra, - Help - }; -#else - /** Base error class - */ - class Error : public std::runtime_error - { - public: - Error(const std::string &problem) : std::runtime_error(problem) {} - virtual ~Error() {}; - }; - - /** Errors that occur during usage - */ - class UsageError : public Error - { - public: - UsageError(const std::string &problem) : Error(problem) {} - virtual ~UsageError() {}; - }; - - /** Errors that occur during regular parsing - */ - class ParseError : public Error - { - public: - ParseError(const std::string &problem) : Error(problem) {} - virtual ~ParseError() {}; - }; - - /** Errors that are detected from group validation after parsing finishes - */ - class ValidationError : public Error - { - public: - ValidationError(const std::string &problem) : Error(problem) {} - virtual ~ValidationError() {}; - }; - - /** Errors that when a required flag is omitted - */ - class RequiredError : public ValidationError - { - public: - RequiredError(const std::string &problem) : ValidationError(problem) {} - virtual ~RequiredError() {}; - }; - - /** Errors in map lookups - */ - class MapError : public ParseError - { - public: - MapError(const std::string &problem) : ParseError(problem) {} - virtual ~MapError() {}; - }; - - /** Error that occurs when a singular flag is specified multiple times - */ - class ExtraError : public ParseError - { - public: - ExtraError(const std::string &problem) : ParseError(problem) {} - virtual ~ExtraError() {}; - }; - - /** An exception that indicates that the user has requested help - */ - class Help : public Error - { - public: - Help(const std::string &flag) : Error(flag) {} - virtual ~Help() {}; - }; -#endif - - /** A simple unified option type for unified initializer lists for the Matcher class. - */ - struct EitherFlag - { - const bool isShort; - const char shortFlag; - const std::string longFlag; - EitherFlag(const std::string &flag) : isShort(false), shortFlag(), longFlag(flag) {} - EitherFlag(const char *flag) : isShort(false), shortFlag(), longFlag(flag) {} - EitherFlag(const char flag) : isShort(true), shortFlag(flag), longFlag() {} - - /** Get just the long flags from an initializer list of EitherFlags - */ - static std::unordered_set GetLong(std::initializer_list flags) - { - std::unordered_set longFlags; - for (const EitherFlag &flag: flags) - { - if (!flag.isShort) - { - longFlags.insert(flag.longFlag); - } - } - return longFlags; - } - - /** Get just the short flags from an initializer list of EitherFlags - */ - static std::unordered_set GetShort(std::initializer_list flags) - { - std::unordered_set shortFlags; - for (const EitherFlag &flag: flags) - { - if (flag.isShort) - { - shortFlags.insert(flag.shortFlag); - } - } - return shortFlags; - } - }; - - - - /** A class of "matchers", specifying short and flags that can possibly be - * matched. - * - * This is supposed to be constructed and then passed in, not used directly - * from user code. - */ - class Matcher - { - private: - const std::unordered_set shortFlags; - const std::unordered_set longFlags; - - public: - /** Specify short and long flags separately as iterators - * - * ex: `args::Matcher(shortFlags.begin(), shortFlags.end(), longFlags.begin(), longFlags.end())` - */ - template - Matcher(ShortIt shortFlagsStart, ShortIt shortFlagsEnd, LongIt longFlagsStart, LongIt longFlagsEnd) : - shortFlags(shortFlagsStart, shortFlagsEnd), - longFlags(longFlagsStart, longFlagsEnd) - {} - - /** Specify short and long flags separately as iterables - * - * ex: `args::Matcher(shortFlags, longFlags)` - */ - template - Matcher(Short &&shortIn, Long &&longIn) : - shortFlags(std::begin(shortIn), std::end(shortIn)), longFlags(std::begin(longIn), std::end(longIn)) - {} - - /** Specify a mixed single initializer-list of both short and long flags - * - * This is the fancy one. It takes a single initializer list of - * any number of any mixed kinds of flags. Chars are - * automatically interpreted as short flags, and strings are - * automatically interpreted as long flags: - * - * args::Matcher{'a'} - * args::Matcher{"foo"} - * args::Matcher{'h', "help"} - * args::Matcher{"foo", 'f', 'F', "FoO"} - */ - Matcher(std::initializer_list in) : - shortFlags(EitherFlag::GetShort(in)), longFlags(EitherFlag::GetLong(in)) {} - - Matcher(Matcher &&other) : shortFlags(std::move(other.shortFlags)), longFlags(std::move(other.longFlags)) - {} - - ~Matcher() {} - - /** (INTERNAL) Check if there is a match of a short flag - */ - bool Match(const char flag) const - { - return shortFlags.find(flag) != shortFlags.end(); - } - - /** (INTERNAL) Check if there is a match of a long flag - */ - bool Match(const std::string &flag) const - { - return longFlags.find(flag) != longFlags.end(); - } - - /** (INTERNAL) Get all flag strings as a vector, with the prefixes embedded - */ - std::vector GetFlagStrings(const std::string &shortPrefix, const std::string &longPrefix) const - { - std::vector flagStrings; - flagStrings.reserve(shortFlags.size() + longFlags.size()); - for (const char flag: shortFlags) - { - flagStrings.emplace_back(shortPrefix + std::string(1, flag)); - } - for (const std::string &flag: longFlags) - { - flagStrings.emplace_back(longPrefix + flag); - } - return flagStrings; - } - - /** (INTERNAL) Get all flag strings as a vector, with the prefixes and names embedded - */ - std::vector GetFlagStrings(const std::string &shortPrefix, const std::string &longPrefix, const std::string &name, const std::string &shortSeparator, const std::string longSeparator) const - { - const std::string bracedname(std::string("[") + name + "]"); - std::vector flagStrings; - flagStrings.reserve(shortFlags.size() + longFlags.size()); - for (const char flag: shortFlags) - { - flagStrings.emplace_back(shortPrefix + std::string(1, flag) + shortSeparator + bracedname); - } - for (const std::string &flag: longFlags) - { - flagStrings.emplace_back(longPrefix + flag + longSeparator + bracedname); - } - return flagStrings; - } - }; - - enum class Options - { - /** Default options. - */ - None = 0x0, - - /** Flag can't be passed multiple times. - */ - Single = 0x01, - - /** Flag can't be omitted. - */ - Required = 0x02, - - /** Flag is excluded from help output. - */ - Hidden = 0x04, - }; - - inline Options operator | (Options lhs, Options rhs) - { - return static_cast(static_cast(lhs) | static_cast(rhs)); - } - - inline Options operator & (Options lhs, Options rhs) - { - return static_cast(static_cast(lhs) & static_cast(rhs)); - } - - /** Base class for all match types - */ - class Base - { - private: - const Options options; - - protected: - bool matched; - const std::string help; -#ifdef ARGS_NOEXCEPT - /// Only for ARGS_NOEXCEPT - Error error; -#endif - - public: - Base(const std::string &help_, Options options_ = {}) : options(options_), matched(false), help(help_) {} - virtual ~Base() {} - - Options GetOptions() const noexcept - { - return options; - } - - virtual bool Matched() const noexcept - { - return matched; - } - - virtual void Validate(const std::string &, const std::string &) - { - } - - operator bool() const noexcept - { - return Matched(); - } - - virtual std::tuple GetDescription(const std::string &, const std::string &, const std::string &, const std::string &) const - { - std::tuple description; - std::get<1>(description) = help; - return description; - } - - virtual void Reset() noexcept - { - matched = false; -#ifdef ARGS_NOEXCEPT - error = Error::None; -#endif - } - -#ifdef ARGS_NOEXCEPT - /// Only for ARGS_NOEXCEPT - virtual Error GetError() const - { - return error; - } -#endif - }; - - /** Base class for all match types that have a name - */ - class NamedBase : public Base - { - protected: - const std::string name; - bool kickout; - - public: - NamedBase(const std::string &name_, const std::string &help_, Options options_ = {}) : Base(help_, options_), name(name_), kickout(false) {} - virtual ~NamedBase() {} - - virtual std::tuple GetDescription(const std::string &, const std::string &, const std::string &, const std::string &) const override - { - std::tuple description; - std::get<0>(description) = Name(); - std::get<1>(description) = help; - return description; - } - - virtual std::string Name() const - { - return name; - } - - /// Sets a kick-out value for building subparsers - void KickOut(bool kickout_) noexcept - { - this->kickout = kickout_; - } - - /// Gets the kick-out value for building subparsers - bool KickOut() const noexcept - { - return kickout; - } - }; - - struct Nargs - { - const size_t min; - const size_t max; - - Nargs(size_t min_, size_t max_) : min(min_), max(max_) - { -#ifndef ARGS_NOEXCEPT - if (max < min) - { - throw std::invalid_argument("Nargs: max > min"); - } -#endif - } - - Nargs(size_t num_) : min(num_), max(num_) - { - } - }; - - /** Base class for all flag options - */ - class FlagBase : public NamedBase - { - protected: - const Matcher matcher; - - public: - FlagBase(const std::string &name_, const std::string &help_, Matcher &&matcher_, const bool extraError_ = false) : NamedBase(name_, help_, extraError_ ? Options::Single : Options()), matcher(std::move(matcher_)) {} - - FlagBase(const std::string &name_, const std::string &help_, Matcher &&matcher_, Options options_) : NamedBase(name_, help_, options_), matcher(std::move(matcher_)) {} - - virtual ~FlagBase() {} - - virtual FlagBase *Match(const std::string &flag) - { - if (matcher.Match(flag)) - { - if ((GetOptions() & Options::Single) != Options::None && matched) - { -#ifdef ARGS_NOEXCEPT - error = Error::Extra; -#else - std::ostringstream problem; - problem << "Flag '" << flag << "' was passed multiple times, but is only allowed to be passed once"; - throw ExtraError(problem.str()); -#endif - } - matched = true; - return this; - } - return nullptr; - } - - virtual void Validate(const std::string &shortPrefix, const std::string &longPrefix) override - { - if (!Matched() && (GetOptions() & Options::Required) != Options::None) - { -#ifdef ARGS_NOEXCEPT - error = Error::Required; -#else - std::ostringstream problem; - problem << "Flag '" << matcher.GetFlagStrings(shortPrefix, longPrefix).at(0) << "' is required"; - throw RequiredError(problem.str()); -#endif - } - } - - virtual FlagBase *Match(const char flag) - { - if (matcher.Match(flag)) - { - if ((GetOptions() & Options::Single) != Options::None && matched) - { -#ifdef ARGS_NOEXCEPT - error = Error::Extra; -#else - std::ostringstream problem; - problem << "Flag '" << flag << "' was passed multiple times, but is only allowed to be passed once"; - throw ExtraError(problem.str()); -#endif - } - matched = true; - return this; - } - return nullptr; - } - - virtual std::tuple GetDescription(const std::string &shortPrefix, const std::string &longPrefix, const std::string &, const std::string &) const override - { - std::tuple description; - const auto flagStrings = matcher.GetFlagStrings(shortPrefix, longPrefix); - std::ostringstream flagstream; - for (auto it = std::begin(flagStrings); it != std::end(flagStrings); ++it) - { - if (it != std::begin(flagStrings)) - { - flagstream << ", "; - } - flagstream << *it; - } - std::get<0>(description) = flagstream.str(); - std::get<1>(description) = help; - return description; - } - - /** Defines how many values can be consumed by this option. - * - * \return closed interval [min, max] - */ - virtual Nargs NumberOfArguments() const noexcept = 0; - - /** Parse values of this option. - * - * \param value Vector of values. It's size must be in NumberOfArguments() interval. - */ - virtual void ParseValue(const std::vector &value) = 0; - }; - - /** Base class for value-accepting flag options - */ - class ValueFlagBase : public FlagBase - { - public: - ValueFlagBase(const std::string &name_, const std::string &help_, Matcher &&matcher_, const bool extraError_ = false) : FlagBase(name_, help_, std::move(matcher_), extraError_) {} - ValueFlagBase(const std::string &name_, const std::string &help_, Matcher &&matcher_, Options options_) : FlagBase(name_, help_, std::move(matcher_), options_) {} - virtual ~ValueFlagBase() {} - - virtual std::tuple GetDescription(const std::string &shortPrefix, const std::string &longPrefix, const std::string &shortSeparator, const std::string &longSeparator) const override - { - std::tuple description; - const auto flagStrings = matcher.GetFlagStrings(shortPrefix, longPrefix, Name(), shortSeparator, longSeparator); - std::ostringstream flagstream; - for (auto it = std::begin(flagStrings); it != std::end(flagStrings); ++it) - { - if (it != std::begin(flagStrings)) - { - flagstream << ", "; - } - flagstream << *it; - } - std::get<0>(description) = flagstream.str(); - std::get<1>(description) = help; - return description; - } - - virtual Nargs NumberOfArguments() const noexcept override - { - return 1; - } - }; - - /** Base class for positional options - */ - class PositionalBase : public NamedBase - { - protected: - bool ready; - - public: - PositionalBase(const std::string &name_, const std::string &help_, Options options_ = Options::None) : NamedBase(name_, help_, options_), ready(true) {} - virtual ~PositionalBase() {} - - bool Ready() - { - return ready; - } - - virtual void ParseValue(const std::string &value_) = 0; - - virtual void Reset() noexcept override - { - matched = false; - ready = true; -#ifdef ARGS_NOEXCEPT - error = Error::None; -#endif - } - - virtual void Validate(const std::string &, const std::string &) override - { - if ((GetOptions() & Options::Required) != Options::None && !Matched()) - { -#ifdef ARGS_NOEXCEPT - error = Error::Required; -#else - std::ostringstream problem; - problem << "Option '" << Name() << "' is required"; - throw RequiredError(problem.str()); -#endif - } - } - }; - - /** Class for all kinds of validating groups, including ArgumentParser - */ - class Group : public Base - { - private: - std::vector children; - std::function validator; - - public: - /** Default validators - */ - struct Validators - { - static bool Xor(const Group &group) - { - return group.MatchedChildren() == 1; - } - - static bool AtLeastOne(const Group &group) - { - return group.MatchedChildren() >= 1; - } - - static bool AtMostOne(const Group &group) - { - return group.MatchedChildren() <= 1; - } - - static bool All(const Group &group) - { - return group.Children().size() == group.MatchedChildren(); - } - - static bool AllOrNone(const Group &group) - { - return (All(group) || None(group)); - } - - static bool AllChildGroups(const Group &group) - { - return std::find_if(std::begin(group.Children()), std::end(group.Children()), [](const Base* child) -> bool { - return dynamic_cast(child) && !child->Matched(); - }) == std::end(group.Children()); - } - - static bool DontCare(const Group &) - { - return true; - } - - static bool CareTooMuch(const Group &) - { - return false; - } - - static bool None(const Group &group) - { - return group.MatchedChildren() == 0; - } - }; - /// If help is empty, this group will not be printed in help output - Group(const std::string &help_ = std::string(), const std::function &validator_ = Validators::DontCare) : Base(help_), validator(validator_) {} - /// If help is empty, this group will not be printed in help output - Group(Group &group_, const std::string &help_ = std::string(), const std::function &validator_ = Validators::DontCare) : Base(help_), validator(validator_) - { - group_.Add(*this); - } - virtual ~Group() {} - - /** Return the first FlagBase that matches flag, or nullptr - * - * \param flag The flag with prefixes stripped - * \return the first matching FlagBase pointer, or nullptr if there is no match - */ - template - FlagBase *Match(const T &flag) - { - for (Base *child: children) - { - if (FlagBase *flagBase = dynamic_cast(child)) - { - if (FlagBase *match = flagBase->Match(flag)) - { - return match; - } - } else if (Group *group = dynamic_cast(child)) - { - if (FlagBase *match = group->Match(flag)) - { - return match; - } - } - } - return nullptr; - } - - virtual void Validate(const std::string &shortPrefix, const std::string &longPrefix) override - { - for (Base *child: children) - { - child->Validate(shortPrefix, longPrefix); - } - } - - /** Get the next ready positional, or nullptr if there is none - * - * \return the first ready PositionalBase pointer, or nullptr if there is no match - */ - PositionalBase *GetNextPositional() - { - for (Base *child: children) - { - auto next = dynamic_cast(child); - auto group = dynamic_cast(child); - if (group) - { - next = group->GetNextPositional(); - } - if (next && next->Ready()) - { - return next; - } - } - return nullptr; - } - - /** Get whether this has any FlagBase children - * - * \return Whether or not there are any FlagBase children - */ - bool HasFlag() const - { - for (Base *child: children) - { - if (dynamic_cast(child)) - { - return true; - } - if (auto group = dynamic_cast(child)) - { - if (group->HasFlag()) - { - return true; - } - } - } - return false; - } - - /** Append a child to this Group. - */ - void Add(Base &child) - { - children.emplace_back(&child); - } - - /** Get all this group's children - */ - const std::vector &Children() const - { - return children; - } - - /** Count the number of matched children this group has - */ - std::vector::size_type MatchedChildren() const - { - return std::count_if(std::begin(children), std::end(children), [](const Base *child){return child->Matched();}); - } - - /** Whether or not this group matches validation - */ - virtual bool Matched() const noexcept override - { - return validator(*this); - } - - /** Get validation - */ - bool Get() const - { - return Matched(); - } - - /** Get all the child descriptions for help generation - */ - std::vector> GetChildDescriptions(const std::string &shortPrefix, const std::string &longPrefix, const std::string &shortSeparator, const std::string &longSeparator, const unsigned int indent = 0) const - { - std::vector> descriptions; - for (const auto &child: children) - { - if ((child->GetOptions() & Options::Hidden) != Options::None) - { - continue; - } - - if (const auto group = dynamic_cast(child)) - { - // Push that group description on the back if not empty - unsigned char addindent = 0; - if (!group->help.empty()) - { - descriptions.emplace_back(group->help, "", indent); - addindent = 1; - } - auto groupDescriptions = group->GetChildDescriptions(shortPrefix, longPrefix, shortSeparator, longSeparator, indent + addindent); - descriptions.insert( - std::end(descriptions), - std::make_move_iterator(std::begin(groupDescriptions)), - std::make_move_iterator(std::end(groupDescriptions))); - } else if (const auto named = dynamic_cast(child)) - { - const auto description = named->GetDescription(shortPrefix, longPrefix, shortSeparator, longSeparator); - descriptions.emplace_back(std::get<0>(description), std::get<1>(description), indent); - } - } - return descriptions; - } - - /** Get the names of positional parameters - */ - std::vector GetPosNames() const - { - std::vector names; - for (const auto &child: children) - { - if (const Group *group = dynamic_cast(child)) - { - auto groupNames = group->GetPosNames(); - names.insert( - std::end(names), - std::make_move_iterator(std::begin(groupNames)), - std::make_move_iterator(std::end(groupNames))); - } else if (const PositionalBase *pos = dynamic_cast(child)) - { - names.emplace_back(pos->Name()); - } - } - return names; - } - - virtual void Reset() noexcept override - { - for (auto &child: children) - { - child->Reset(); - } -#ifdef ARGS_NOEXCEPT - error = Error::None; -#endif - } - -#ifdef ARGS_NOEXCEPT - /// Only for ARGS_NOEXCEPT - virtual Error GetError() const override - { - if (error != Error::None) - { - return error; - } - - auto it = std::find_if(std::begin(children), std::end(children), [](const Base *child){return child->GetError() != Error::None;}); - if (it == std::end(children)) - { - return Error::None; - } else - { - return (*it)->GetError(); - } - } -#endif - - }; - - /** The main user facing command line argument parser class - */ - class ArgumentParser : public Group - { - private: - std::string prog; - std::string proglinePostfix; - std::string description; - std::string epilog; - - std::string longprefix; - std::string shortprefix; - - std::string longseparator; - - std::string terminator; - - bool allowJoinedShortValue; - bool allowJoinedLongValue; - bool allowSeparateShortValue; - bool allowSeparateLongValue; - - protected: - bool RaiseParseError(const std::string &message) - { -#ifdef ARGS_NOEXCEPT - (void)message; - error = Error::Parse; - return false; -#else - throw ParseError(message); -#endif - } - - enum class OptionType - { - LongFlag, - ShortFlag, - Positional - }; - - OptionType ParseOption(const std::string &s) - { - if (s.find(longprefix) == 0 && s.length() > longprefix.length()) - { - return OptionType::LongFlag; - } - - if (s.find(shortprefix) == 0 && s.length() > shortprefix.length()) - { - return OptionType::ShortFlag; - } - - return OptionType::Positional; - } - - /** (INTERNAL) Parse flag's values - * - * \param arg The string to display in error message as a flag name - * \param[in, out] it The iterator to first value. It will point to the last value - * \param end The end iterator - * \param joinedArg Joined value (e.g. bar in --foo=bar) - * \param canDiscardJoined If true joined value can be parsed as flag not as a value (as in -abcd) - * \param[out] values The vector to store parsed arg's values - */ - template - bool ParseArgsValues(FlagBase &flag, const std::string &arg, It &it, It end, - const bool allowSeparate, const bool allowJoined, - const bool hasJoined, const std::string &joinedArg, - const bool canDiscardJoined, std::vector &values) - { - values.clear(); - - Nargs nargs = flag.NumberOfArguments(); - - if (hasJoined && !allowJoined && nargs.min != 0) - { - return RaiseParseError("Flag '" + arg + "' was passed a joined argument, but these are disallowed"); - } - - if (hasJoined) - { - if (!canDiscardJoined || nargs.max != 0) - { - values.push_back(joinedArg); - } - } else if (!allowSeparate) - { - if (nargs.min != 0) - { - return RaiseParseError("Flag '" + arg + "' was passed a separate argument, but these are disallowed"); - } - } else - { - auto valueIt = it; - ++valueIt; - - while (valueIt != end && - values.size() < nargs.max && - (nargs.min == nargs.max || ParseOption(*valueIt) == OptionType::Positional)) - { - - values.push_back(*valueIt); - ++it; - ++valueIt; - } - } - - if (values.size() > nargs.max) - { - return RaiseParseError("Passed an argument into a non-argument flag: " + arg); - } else if (values.size() < nargs.min) - { - if (nargs.min == 1 && nargs.max == 1) - { - return RaiseParseError("Flag '" + arg + "' requires an argument but received none"); - } else if (nargs.min == 1) - { - return RaiseParseError("Flag '" + arg + "' requires at least one argument but received none"); - } else if (nargs.min != nargs.max) - { - return RaiseParseError("Flag '" + arg + "' requires at least " + std::to_string(nargs.min) + - " arguments but received " + std::to_string(values.size())); - } else - { - return RaiseParseError("Flag '" + arg + "' requires " + std::to_string(nargs.min) + - " arguments but received " + std::to_string(values.size())); - } - } - - return true; - } - - template - bool ParseLong(It &it, It end) - { - const auto &chunk = *it; - const auto argchunk = chunk.substr(longprefix.size()); - // Try to separate it, in case of a separator: - const auto separator = longseparator.empty() ? argchunk.npos : argchunk.find(longseparator); - // If the separator is in the argument, separate it. - const auto arg = (separator != argchunk.npos ? - std::string(argchunk, 0, separator) - : argchunk); - const auto joined = (separator != argchunk.npos ? - argchunk.substr(separator + longseparator.size()) - : std::string()); - - if (auto flag = Match(arg)) - { - std::vector values; - if (!ParseArgsValues(*flag, arg, it, end, allowSeparateLongValue, allowJoinedLongValue, - separator != argchunk.npos, joined, false, values)) - { - return false; - } - - flag->ParseValue(values); - - if (flag->KickOut()) - { - ++it; - return false; - } - } else - { - return RaiseParseError("Flag could not be matched: " + arg); - } - - return true; - } - - template - bool ParseShort(It &it, It end) - { - const auto &chunk = *it; - const auto argchunk = chunk.substr(shortprefix.size()); - for (auto argit = std::begin(argchunk); argit != std::end(argchunk); ++argit) - { - const auto arg = *argit; - - if (auto flag = Match(arg)) - { - const std::string value(argit + 1, std::end(argchunk)); - std::vector values; - if (!ParseArgsValues(*flag, std::string(1, arg), it, end, - allowSeparateShortValue, allowJoinedShortValue, - !value.empty(), value, !value.empty(), values)) - { - return false; - } - - flag->ParseValue(values); - - if (flag->KickOut()) - { - ++it; - return false; - } - - if (!values.empty()) - { - break; - } - } else - { - return RaiseParseError("Flag could not be matched: '" + std::string(1, arg) + "'"); - } - } - - return true; - } - - public: - /** A simple structure of parameters for easy user-modifyable help menus - */ - struct HelpParams - { - /** The width of the help menu - */ - unsigned int width = 80; - /** The indent of the program line - */ - unsigned int progindent = 2; - /** The indent of the program trailing lines for long parameters - */ - unsigned int progtailindent = 4; - /** The indent of the description and epilogs - */ - unsigned int descriptionindent = 4; - /** The indent of the flags - */ - unsigned int flagindent = 6; - /** The indent of the flag descriptions - */ - unsigned int helpindent = 40; - /** The additional indent each group adds - */ - unsigned int eachgroupindent = 2; - - /** The minimum gutter between each flag and its help - */ - unsigned int gutter = 1; - - /** Show the terminator when both options and positional parameters are present - */ - bool showTerminator = true; - - /** Show the {OPTIONS} on the prog line when this is true - */ - bool showProglineOptions = true; - - /** Show the positionals on the prog line when this is true - */ - bool showProglinePositionals = true; - } helpParams; - ArgumentParser(const std::string &description_, const std::string &epilog_ = std::string()) : - Group("", Group::Validators::AllChildGroups), - description(description_), - epilog(epilog_), - longprefix("--"), - shortprefix("-"), - longseparator("="), - terminator("--"), - allowJoinedShortValue(true), - allowJoinedLongValue(true), - allowSeparateShortValue(true), - allowSeparateLongValue(true) {} - - /** The program name for help generation - */ - const std::string &Prog() const - { return prog; } - /** The program name for help generation - */ - void Prog(const std::string &prog_) - { this->prog = prog_; } - - /** The description that appears on the prog line after options - */ - const std::string &ProglinePostfix() const - { return proglinePostfix; } - /** The description that appears on the prog line after options - */ - void ProglinePostfix(const std::string &proglinePostfix_) - { this->proglinePostfix = proglinePostfix_; } - - /** The description that appears above options - */ - const std::string &Description() const - { return description; } - /** The description that appears above options - */ - void Description(const std::string &description_) - { this->description = description_; } - - /** The description that appears below options - */ - const std::string &Epilog() const - { return epilog; } - /** The description that appears below options - */ - void Epilog(const std::string &epilog_) - { this->epilog = epilog_; } - - /** The prefix for long flags - */ - const std::string &LongPrefix() const - { return longprefix; } - /** The prefix for long flags - */ - void LongPrefix(const std::string &longprefix_) - { this->longprefix = longprefix_; } - - /** The prefix for short flags - */ - const std::string &ShortPrefix() const - { return shortprefix; } - /** The prefix for short flags - */ - void ShortPrefix(const std::string &shortprefix_) - { this->shortprefix = shortprefix_; } - - /** The separator for long flags - */ - const std::string &LongSeparator() const - { return longseparator; } - /** The separator for long flags - */ - void LongSeparator(const std::string &longseparator_) - { - if (longseparator_.empty()) - { -#ifdef ARGS_NOEXCEPT - error = Error::Usage; -#else - throw UsageError("longseparator can not be set to empty"); -#endif - } else - { - this->longseparator = longseparator_; - } - } - - /** The terminator that forcibly separates flags from positionals - */ - const std::string &Terminator() const - { return terminator; } - /** The terminator that forcibly separates flags from positionals - */ - void Terminator(const std::string &terminator_) - { this->terminator = terminator_; } - - /** Get the current argument separation parameters. - * - * See SetArgumentSeparations for details on what each one means. - */ - void GetArgumentSeparations( - bool &allowJoinedShortValue_, - bool &allowJoinedLongValue_, - bool &allowSeparateShortValue_, - bool &allowSeparateLongValue_) const - { - allowJoinedShortValue_ = this->allowJoinedShortValue; - allowJoinedLongValue_ = this->allowJoinedLongValue; - allowSeparateShortValue_ = this->allowSeparateShortValue; - allowSeparateLongValue_ = this->allowSeparateLongValue; - } - - /** Change allowed option separation. - * - * \param allowJoinedShortValue Allow a short flag that accepts an argument to be passed its argument immediately next to it (ie. in the same argv field) - * \param allowJoinedLongValue Allow a long flag that accepts an argument to be passed its argument separated by the longseparator (ie. in the same argv field) - * \param allowSeparateShortValue Allow a short flag that accepts an argument to be passed its argument separated by whitespace (ie. in the next argv field) - * \param allowSeparateLongValue Allow a long flag that accepts an argument to be passed its argument separated by whitespace (ie. in the next argv field) - */ - void SetArgumentSeparations( - const bool allowJoinedShortValue_, - const bool allowJoinedLongValue_, - const bool allowSeparateShortValue_, - const bool allowSeparateLongValue_) - { - this->allowJoinedShortValue = allowJoinedShortValue_; - this->allowJoinedLongValue = allowJoinedLongValue_; - this->allowSeparateShortValue = allowSeparateShortValue_; - this->allowSeparateLongValue = allowSeparateLongValue_; - } - - /** Pass the help menu into an ostream - */ - void Help(std::ostream &help_) const - { - bool hasoptions = false; - bool hasarguments = false; - - const auto description_text = Wrap(this->description, helpParams.width - helpParams.descriptionindent); - const auto epilog_text = Wrap(this->epilog, helpParams.width - helpParams.descriptionindent); - std::ostringstream prognameline; - prognameline << prog; - if (HasFlag()) - { - hasoptions = true; - if (helpParams.showProglineOptions) - { - prognameline << " {OPTIONS}"; - } - } - for (const std::string &posname: GetPosNames()) - { - hasarguments = true; - if (helpParams.showProglinePositionals) - { - prognameline << " [" << posname << ']'; - } - } - if (!proglinePostfix.empty()) - { - prognameline << ' ' << proglinePostfix; - } - const auto proglines = Wrap(prognameline.str(), helpParams.width - (helpParams.progindent + 4), helpParams.width - helpParams.progindent); - auto progit = std::begin(proglines); - if (progit != std::end(proglines)) - { - help_ << std::string(helpParams.progindent, ' ') << *progit << '\n'; - ++progit; - } - for (; progit != std::end(proglines); ++progit) - { - help_ << std::string(helpParams.progtailindent, ' ') << *progit << '\n'; - } - - help_ << '\n'; - - for (const auto &line: description_text) - { - help_ << std::string(helpParams.descriptionindent, ' ') << line << "\n"; - } - help_ << "\n"; - help_ << std::string(helpParams.progindent, ' ') << "OPTIONS:\n\n"; - for (const auto &desc: GetChildDescriptions(shortprefix, longprefix, allowJoinedShortValue ? "" : " ", allowJoinedLongValue ? longseparator : " ")) - { - const auto groupindent = std::get<2>(desc) * helpParams.eachgroupindent; - const auto flags = Wrap(std::get<0>(desc), helpParams.width - (helpParams.flagindent + helpParams.helpindent + helpParams.gutter)); - const auto info = Wrap(std::get<1>(desc), helpParams.width - (helpParams.helpindent + groupindent)); - - std::string::size_type flagssize = 0; - for (auto flagsit = std::begin(flags); flagsit != std::end(flags); ++flagsit) - { - if (flagsit != std::begin(flags)) - { - help_ << '\n'; - } - help_ << std::string(groupindent + helpParams.flagindent, ' ') << *flagsit; - flagssize = Glyphs(*flagsit); - } - - auto infoit = std::begin(info); - // groupindent is on both sides of this inequality, and therefore can be removed - if ((helpParams.flagindent + flagssize + helpParams.gutter) > helpParams.helpindent || infoit == std::end(info)) - { - help_ << '\n'; - } else - { - // groupindent is on both sides of the minus sign, and therefore doesn't actually need to be in here - help_ << std::string(helpParams.helpindent - (helpParams.flagindent + flagssize), ' ') << *infoit << '\n'; - ++infoit; - } - for (; infoit != std::end(info); ++infoit) - { - help_ << std::string(groupindent + helpParams.helpindent, ' ') << *infoit << '\n'; - } - } - if (hasoptions && hasarguments && helpParams.showTerminator) - { - for (const auto &item: Wrap(std::string("\"") + terminator + "\" can be used to terminate flag options and force all following arguments to be treated as positional options", helpParams.width - helpParams.flagindent)) - { - help_ << std::string(helpParams.flagindent, ' ') << item << '\n'; - } - } - - help_ << "\n"; - for (const auto &line: epilog_text) - { - help_ << std::string(helpParams.descriptionindent, ' ') << line << "\n"; - } - } - - /** Generate a help menu as a string. - * - * \return the help text as a single string - */ - std::string Help() const - { - std::ostringstream help_; - Help(help_); - return help_.str(); - } - - /** Parse all arguments. - * - * \param begin an iterator to the beginning of the argument list - * \param end an iterator to the past-the-end element of the argument list - * \return the iterator after the last parsed value. Only useful for kick-out - */ - template - It ParseArgs(It begin, It end) - { - // Reset all Matched statuses and errors - Reset(); - bool terminated = false; - - // Check all arg chunks - for (auto it = begin; it != end; ++it) - { - const auto &chunk = *it; - - if (!terminated && chunk == terminator) - { - terminated = true; - } else if (!terminated && ParseOption(chunk) == OptionType::LongFlag) - { - if (!ParseLong(it, end)) - { - return it; - } - } else if (!terminated && ParseOption(chunk) == OptionType::ShortFlag) - { - if (!ParseShort(it, end)) - { - return it; - } - } else - { - auto pos = GetNextPositional(); - if (pos) - { - pos->ParseValue(chunk); - - if (pos->KickOut()) - { - return ++it; - } - } else - { - RaiseParseError("Passed in argument, but no positional arguments were ready to receive it: " + chunk); - return it; - } - } - } - - for (Base *child: Children()) - { - child->Validate(shortprefix, longprefix); - } - - if (!Matched()) - { -#ifdef ARGS_NOEXCEPT - error = Error::Validation; -#else - std::ostringstream problem; - problem << "Group validation failed somewhere!"; - throw ValidationError(problem.str()); -#endif - } - - return end; - } - - /** Parse all arguments. - * - * \param args an iterable of the arguments - * \return the iterator after the last parsed value. Only useful for kick-out - */ - template - auto ParseArgs(const T &args) -> decltype(std::begin(args)) - { - return ParseArgs(std::begin(args), std::end(args)); - } - - /** Convenience function to parse the CLI from argc and argv - * - * Just assigns the program name and vectorizes arguments for passing into ParseArgs() - * - * \return whether or not all arguments were parsed. This works for detecting kick-out, but is generally useless as it can't do anything with it. - */ - bool ParseCLI(const int argc, const char * const * argv) - { - if (prog.empty()) - { - prog.assign(argv[0]); - } - const std::vector args(argv + 1, argv + argc); - return ParseArgs(args) == std::end(args); - } - }; - - inline std::ostream &operator<<(std::ostream &os, const ArgumentParser &parser) - { - parser.Help(os); - return os; - } - - /** Boolean argument matcher - */ - class Flag : public FlagBase - { - public: - Flag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, Options options_): FlagBase(name_, help_, std::move(matcher_), options_) - { - group_.Add(*this); - } - - Flag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const bool extraError_ = false): Flag(group_, name_, help_, std::move(matcher_), extraError_ ? Options::Single : Options::None) - { - } - - virtual ~Flag() {} - - /** Get whether this was matched - */ - bool Get() const - { - return Matched(); - } - - virtual Nargs NumberOfArguments() const noexcept override - { - return 0; - } - - virtual void ParseValue(const std::vector&) override - { - } - }; - - /** Help flag class - * - * Works like a regular flag, but throws an instance of Help when it is matched - */ - class HelpFlag : public Flag - { - public: - HelpFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_): Flag(group_, name_, help_, std::move(matcher_)) {} - - virtual ~HelpFlag() {} - - virtual FlagBase *Match(const std::string &arg) override - { - if (FlagBase::Match(arg)) - { -#ifdef ARGS_NOEXCEPT - error = Error::Help; - return this; -#else - throw Help(arg); -#endif - } - return nullptr; - } - - virtual FlagBase *Match(const char arg) override - { - if (FlagBase::Match(arg)) - { -#ifdef ARGS_NOEXCEPT - error = Error::Help; - return this; -#else - throw Help(std::string(1, arg)); -#endif - } - return nullptr; - } - - /** Get whether this was matched - */ - bool Get() const noexcept - { - return Matched(); - } - }; - - /** A flag class that simply counts the number of times it's matched - */ - class CounterFlag : public Flag - { - private: - const int startcount; - int count; - - public: - CounterFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const int startcount_ = 0): Flag(group_, name_, help_, std::move(matcher_)), startcount(startcount_), count(startcount_) {} - - virtual ~CounterFlag() {} - - virtual FlagBase *Match(const std::string &arg) override - { - auto me = FlagBase::Match(arg); - if (me) - { - ++count; - } - return me; - } - - virtual FlagBase *Match(const char arg) override - { - auto me = FlagBase::Match(arg); - if (me) - { - ++count; - } - return me; - } - - /** Get the count - */ - int &Get() noexcept - { - return count; - } - - virtual void Reset() noexcept override - { - FlagBase::Reset(); - count = startcount; - } - }; - - /** A default Reader class for argument classes - * - * Simply uses a std::istringstream to read into the destination type, and - * raises a ParseError if there are any characters left. - */ - template - struct ValueReader - { - bool operator ()(const std::string &name, const std::string &value, T &destination) - { - std::istringstream ss(value); - ss >> destination; - - if (ss.rdbuf()->in_avail() > 0) - { -#ifdef ARGS_NOEXCEPT - return false; -#else - std::ostringstream problem; - problem << "Argument '" << name << "' received invalid value type '" << value << "'"; - throw ParseError(problem.str()); -#endif - } - return true; - } - }; - - /** std::string specialization for ValueReader - * - * By default, stream extraction into a string splits on white spaces, and - * it is more efficient to ust copy a string into the destination. - */ - template <> - struct ValueReader - { - bool operator()(const std::string &, const std::string &value, std::string &destination) - { - destination.assign(value); - return true; - } - }; - - /** An argument-accepting flag class - * - * \tparam T the type to extract the argument as - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - */ - template < - typename T, - typename Reader = ValueReader> - class ValueFlag : public ValueFlagBase - { - protected: - T value; - - private: - Reader reader; - - public: - - ValueFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const T &defaultValue_, Options options_): ValueFlagBase(name_, help_, std::move(matcher_), options_), value(defaultValue_) - { - group_.Add(*this); - } - - ValueFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const T &defaultValue_ = T(), const bool extraError_ = false): ValueFlag(group_, name_, help_, std::move(matcher_), defaultValue_, extraError_ ? Options::Single : Options::None) - { - } - - ValueFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, Options options_): ValueFlag(group_, name_, help_, std::move(matcher_), T(), options_) - { - } - - virtual ~ValueFlag() {} - - virtual void ParseValue(const std::vector &values_) override - { - const std::string &value_ = values_.at(0); - -#ifdef ARGS_NOEXCEPT - if (!reader(name, value_, this->value)) - { - error = Error::Parse; - } -#else - reader(name, value_, this->value); -#endif - } - - /** Get the value - */ - T &Get() noexcept - { - return value; - } - }; - - /** An optional argument-accepting flag class - * - * \tparam T the type to extract the argument as - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - */ - template < - typename T, - typename Reader = ValueReader> - class ImplicitValueFlag : public ValueFlag - { - protected: - - T implicitValue; - T defaultValue; - - public: - - ImplicitValueFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const T &implicitValue_, const T &defaultValue_ = T(), Options options_ = {}) - : ValueFlag(group_, name_, help_, std::move(matcher_), defaultValue_, options_), implicitValue(implicitValue_), defaultValue(defaultValue_) - { - } - - ImplicitValueFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const T &defaultValue_ = T(), Options options_ = {}) - : ValueFlag(group_, name_, help_, std::move(matcher_), defaultValue_, options_), implicitValue(defaultValue_), defaultValue(defaultValue_) - { - } - - ImplicitValueFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, Options options_) - : ValueFlag(group_, name_, help_, std::move(matcher_), {}, options_), implicitValue(), defaultValue() - { - } - - virtual ~ImplicitValueFlag() {} - - virtual Nargs NumberOfArguments() const noexcept override - { - return {0, 1}; - } - - virtual void ParseValue(const std::vector &value_) override - { - if (value_.empty()) - { - this->value = implicitValue; - } else - { - ValueFlag::ParseValue(value_); - } - } - - virtual void Reset() noexcept override - { - this->value = defaultValue; - ValueFlag::Reset(); - } - }; - - /** A variadic arguments accepting flag class - * - * \tparam T the type to extract the argument as - * \tparam List the list type that houses the values - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - */ - template < - typename T, - template class List = std::vector, - typename Reader = ValueReader> - class NargsValueFlag : public FlagBase - { - protected: - - List values; - Nargs nargs; - Reader reader; - - public: - - typedef List Container; - typedef T value_type; - typedef typename Container::allocator_type allocator_type; - typedef typename Container::pointer pointer; - typedef typename Container::const_pointer const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef typename Container::size_type size_type; - typedef typename Container::difference_type difference_type; - typedef typename Container::iterator iterator; - typedef typename Container::const_iterator const_iterator; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - - NargsValueFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, Nargs nargs_, const List &defaultValues_ = {}, Options options_ = {}) - : FlagBase(name_, help_, std::move(matcher_), options_), values(defaultValues_), nargs(nargs_) - { - group_.Add(*this); - } - - virtual ~NargsValueFlag() {} - - virtual Nargs NumberOfArguments() const noexcept override - { - return nargs; - } - - virtual void ParseValue(const std::vector &values_) override - { - values.clear(); - - for (const std::string &value : values_) - { - T v; -#ifdef ARGS_NOEXCEPT - if (!reader(name, value, v)) - { - error = Error::Parse; - } -#else - reader(name, value, v); -#endif - values.insert(std::end(values), v); - } - } - - List &Get() noexcept - { - return values; - } - - iterator begin() noexcept - { - return values.begin(); - } - - const_iterator begin() const noexcept - { - return values.begin(); - } - - const_iterator cbegin() const noexcept - { - return values.cbegin(); - } - - iterator end() noexcept - { - return values.end(); - } - - const_iterator end() const noexcept - { - return values.end(); - } - - const_iterator cend() const noexcept - { - return values.cend(); - } - }; - - /** An argument-accepting flag class that pushes the found values into a list - * - * \tparam T the type to extract the argument as - * \tparam List the list type that houses the values - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - */ - template < - typename T, - template class List = std::vector, - typename Reader = ValueReader> - class ValueFlagList : public ValueFlagBase - { - private: - using Container = List; - Container values; - Reader reader; - - public: - - typedef T value_type; - typedef typename Container::allocator_type allocator_type; - typedef typename Container::pointer pointer; - typedef typename Container::const_pointer const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef typename Container::size_type size_type; - typedef typename Container::difference_type difference_type; - typedef typename Container::iterator iterator; - typedef typename Container::const_iterator const_iterator; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - - ValueFlagList(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const Container &defaultValues_ = Container()): ValueFlagBase(name_, help_, std::move(matcher_)), values(defaultValues_) - { - group_.Add(*this); - } - - virtual ~ValueFlagList() {} - - virtual void ParseValue(const std::vector &values_) override - { - const std::string &value_ = values_.at(0); - - T v; -#ifdef ARGS_NOEXCEPT - if (!reader(name, value_, v)) - { - error = Error::Parse; - } -#else - reader(name, value_, v); -#endif - values.insert(std::end(values), v); - } - - /** Get the values - */ - Container &Get() noexcept - { - return values; - } - - virtual std::string Name() const override - { - return name + std::string("..."); - } - - virtual void Reset() noexcept override - { - ValueFlagBase::Reset(); - values.clear(); - } - - iterator begin() noexcept - { - return values.begin(); - } - - const_iterator begin() const noexcept - { - return values.begin(); - } - - const_iterator cbegin() const noexcept - { - return values.cbegin(); - } - - iterator end() noexcept - { - return values.end(); - } - - const_iterator end() const noexcept - { - return values.end(); - } - - const_iterator cend() const noexcept - { - return values.cend(); - } - }; - - /** A mapping value flag class - * - * \tparam K the type to extract the argument as - * \tparam T the type to store the result as - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - * \tparam Map The Map type. Should operate like std::map or std::unordered_map - */ - template < - typename K, - typename T, - typename Reader = ValueReader, - template class Map = std::unordered_map> - class MapFlag : public ValueFlagBase - { - private: - const Map map; - T value; - Reader reader; - - public: - - MapFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const Map &map_, const T &defaultValue_, Options options_): ValueFlagBase(name_, help_, std::move(matcher_), options_), map(map_), value(defaultValue_) - { - group_.Add(*this); - } - - MapFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const Map &map_, const T &defaultValue_ = T(), const bool extraError_ = false): MapFlag(group_, name_, help_, std::move(matcher_), map_, defaultValue_, extraError_ ? Options::Single : Options::None) - { - } - - MapFlag(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const Map &map_, Options options_): MapFlag(group_, name_, help_, std::move(matcher_), map_, T(), options_) - { - } - - virtual ~MapFlag() {} - - virtual void ParseValue(const std::vector &values_) override - { - const std::string &value_ = values_.at(0); - - K key; -#ifdef ARGS_NOEXCEPT - if (!reader(name, value_, key)) - { - error = Error::Parse; - } -#else - reader(name, value_, key); -#endif - auto it = map.find(key); - if (it == std::end(map)) - { -#ifdef ARGS_NOEXCEPT - error = Error::Map; -#else - std::ostringstream problem; - problem << "Could not find key '" << key << "' in map for arg '" << name << "'"; - throw MapError(problem.str()); -#endif - } else - { - this->value = it->second; - } - } - - /** Get the value - */ - T &Get() noexcept - { - return value; - } - }; - - /** A mapping value flag list class - * - * \tparam K the type to extract the argument as - * \tparam T the type to store the result as - * \tparam List the list type that houses the values - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - * \tparam Map The Map type. Should operate like std::map or std::unordered_map - */ - template < - typename K, - typename T, - template class List = std::vector, - typename Reader = ValueReader, - template class Map = std::unordered_map> - class MapFlagList : public ValueFlagBase - { - private: - using Container = List; - const Map map; - Container values; - Reader reader; - - public: - typedef T value_type; - typedef typename Container::allocator_type allocator_type; - typedef typename Container::pointer pointer; - typedef typename Container::const_pointer const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef typename Container::size_type size_type; - typedef typename Container::difference_type difference_type; - typedef typename Container::iterator iterator; - typedef typename Container::const_iterator const_iterator; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - - MapFlagList(Group &group_, const std::string &name_, const std::string &help_, Matcher &&matcher_, const Map &map_, const Container &defaultValues_ = Container()): ValueFlagBase(name_, help_, std::move(matcher_)), map(map_), values(defaultValues_) - { - group_.Add(*this); - } - - virtual ~MapFlagList() {} - - virtual void ParseValue(const std::vector &values_) override - { - const std::string &value = values_.at(0); - - K key; -#ifdef ARGS_NOEXCEPT - if (!reader(name, value, key)) - { - error = Error::Parse; - } -#else - reader(name, value, key); -#endif - auto it = map.find(key); - if (it == std::end(map)) - { -#ifdef ARGS_NOEXCEPT - error = Error::Map; -#else - std::ostringstream problem; - problem << "Could not find key '" << key << "' in map for arg '" << name << "'"; - throw MapError(problem.str()); -#endif - } else - { - this->values.emplace_back(it->second); - } - } - - /** Get the value - */ - Container &Get() noexcept - { - return values; - } - - virtual std::string Name() const override - { - return name + std::string("..."); - } - - virtual void Reset() noexcept override - { - ValueFlagBase::Reset(); - values.clear(); - } - - iterator begin() noexcept - { - return values.begin(); - } - - const_iterator begin() const noexcept - { - return values.begin(); - } - - const_iterator cbegin() const noexcept - { - return values.cbegin(); - } - - iterator end() noexcept - { - return values.end(); - } - - const_iterator end() const noexcept - { - return values.end(); - } - - const_iterator cend() const noexcept - { - return values.cend(); - } - }; - - /** A positional argument class - * - * \tparam T the type to extract the argument as - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - */ - template < - typename T, - typename Reader = ValueReader> - class Positional : public PositionalBase - { - private: - T value; - Reader reader; - public: - Positional(Group &group_, const std::string &name_, const std::string &help_, const T &defaultValue_ = T(), Options options_ = Options::None): PositionalBase(name_, help_, options_), value(defaultValue_) - { - group_.Add(*this); - } - - Positional(Group &group_, const std::string &name_, const std::string &help_, Options options_): Positional(group_, name_, help_, T(), options_) - { - } - - virtual ~Positional() {} - - virtual void ParseValue(const std::string &value_) override - { -#ifdef ARGS_NOEXCEPT - if (!reader(name, value_, this->value)) - { - error = Error::Parse; - } -#else - reader(name, value_, this->value); -#endif - ready = false; - matched = true; - } - - /** Get the value - */ - T &Get() noexcept - { - return value; - } - }; - - /** A positional argument class that pushes the found values into a list - * - * \tparam T the type to extract the argument as - * \tparam List the list type that houses the values - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - */ - template < - typename T, - template class List = std::vector, - typename Reader = ValueReader> - class PositionalList : public PositionalBase - { - private: - using Container = List; - Container values; - Reader reader; - - public: - typedef T value_type; - typedef typename Container::allocator_type allocator_type; - typedef typename Container::pointer pointer; - typedef typename Container::const_pointer const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef typename Container::size_type size_type; - typedef typename Container::difference_type difference_type; - typedef typename Container::iterator iterator; - typedef typename Container::const_iterator const_iterator; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - - PositionalList(Group &group_, const std::string &name_, const std::string &help_, const Container &defaultValues_ = Container()): PositionalBase(name_, help_), values(defaultValues_) - { - group_.Add(*this); - } - - virtual ~PositionalList() {} - - virtual void ParseValue(const std::string &value_) override - { - T v; -#ifdef ARGS_NOEXCEPT - if (!reader(name, value_, v)) - { - error = Error::Parse; - } -#else - reader(name, value_, v); -#endif - values.insert(std::end(values), v); - matched = true; - } - - virtual std::string Name() const override - { - return name + std::string("..."); - } - - /** Get the values - */ - Container &Get() noexcept - { - return values; - } - - virtual void Reset() noexcept override - { - PositionalBase::Reset(); - values.clear(); - } - - iterator begin() noexcept - { - return values.begin(); - } - - const_iterator begin() const noexcept - { - return values.begin(); - } - - const_iterator cbegin() const noexcept - { - return values.cbegin(); - } - - iterator end() noexcept - { - return values.end(); - } - - const_iterator end() const noexcept - { - return values.end(); - } - - const_iterator cend() const noexcept - { - return values.cend(); - } - }; - - /** A positional argument mapping class - * - * \tparam K the type to extract the argument as - * \tparam T the type to store the result as - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - * \tparam Map The Map type. Should operate like std::map or std::unordered_map - */ - template < - typename K, - typename T, - typename Reader = ValueReader, - template class Map = std::unordered_map> - class MapPositional : public PositionalBase - { - private: - const Map map; - T value; - Reader reader; - - public: - - MapPositional(Group &group_, const std::string &name_, const std::string &help_, const Map &map_, const T &defaultValue_ = T()): PositionalBase(name_, help_), map(map_), value(defaultValue_) - { - group_.Add(*this); - } - - virtual ~MapPositional() {} - - virtual void ParseValue(const std::string &value_) override - { - K key; -#ifdef ARGS_NOEXCEPT - if (!reader(name, value_, key)) - { - error = Error::Parse; - } -#else - reader(name, value_, key); -#endif - auto it = map.find(key); - if (it == std::end(map)) - { -#ifdef ARGS_NOEXCEPT - error = Error::Map; -#else - std::ostringstream problem; - problem << "Could not find key '" << key << "' in map for arg '" << name << "'"; - throw MapError(problem.str()); -#endif - } else - { - this->value = it->second; - ready = false; - matched = true; - } - } - - /** Get the value - */ - T &Get() noexcept - { - return value; - } - }; - - /** A positional argument mapping list class - * - * \tparam K the type to extract the argument as - * \tparam T the type to store the result as - * \tparam List the list type that houses the values - * \tparam Reader The functor type used to read the argument, taking the name, value, and destination reference with operator(), and returning a bool (if ARGS_NOEXCEPT is defined) - * \tparam Map The Map type. Should operate like std::map or std::unordered_map - */ - template < - typename K, - typename T, - template class List = std::vector, - typename Reader = ValueReader, - template class Map = std::unordered_map> - class MapPositionalList : public PositionalBase - { - private: - using Container = List; - - const Map map; - Container values; - Reader reader; - - public: - typedef T value_type; - typedef typename Container::allocator_type allocator_type; - typedef typename Container::pointer pointer; - typedef typename Container::const_pointer const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef typename Container::size_type size_type; - typedef typename Container::difference_type difference_type; - typedef typename Container::iterator iterator; - typedef typename Container::const_iterator const_iterator; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - - MapPositionalList(Group &group_, const std::string &name_, const std::string &help_, const Map &map_, const Container &defaultValues_ = Container()): PositionalBase(name_, help_), map(map_), values(defaultValues_) - { - group_.Add(*this); - } - - virtual ~MapPositionalList() {} - - virtual void ParseValue(const std::string &value_) override - { - K key; -#ifdef ARGS_NOEXCEPT - if (!reader(name, value_, key)) - { - error = Error::Parse; - } -#else - reader(name, value_, key); -#endif - auto it = map.find(key); - if (it == std::end(map)) - { -#ifdef ARGS_NOEXCEPT - error = Error::Map; -#else - std::ostringstream problem; - problem << "Could not find key '" << key << "' in map for arg '" << name << "'"; - throw MapError(problem.str()); -#endif - } else - { - this->values.emplace_back(it->second); - matched = true; - } - } - - /** Get the value - */ - Container &Get() noexcept - { - return values; - } - - virtual std::string Name() const override - { - return name + std::string("..."); - } - - virtual void Reset() noexcept override - { - PositionalBase::Reset(); - values.clear(); - } - - iterator begin() noexcept - { - return values.begin(); - } - - const_iterator begin() const noexcept - { - return values.begin(); - } - - const_iterator cbegin() const noexcept - { - return values.cbegin(); - } - - iterator end() noexcept - { - return values.end(); - } - - const_iterator end() const noexcept - { - return values.end(); - } - - const_iterator cend() const noexcept - { - return values.cend(); - } - }; -} - -#endif diff --git a/tools/avx-turbo/asm-methods.asm b/tools/avx-turbo/asm-methods.asm deleted file mode 100644 index 9e55a8d..0000000 --- a/tools/avx-turbo/asm-methods.asm +++ /dev/null @@ -1,224 +0,0 @@ -BITS 64 -default rel - -%if (__NASM_MAJOR__ < 2) || (__NASM_MINOR__ < 11) -%deftok ver __NASM_VER__ -%error Your nasm version (ver) is too old, you need at least 2.11 to compile this -%endif - -%include "nasm-utils-inc.asm" - -nasm_util_assert_boilerplate -thunk_boilerplate - -; aligns and declares the global label for the bench with the given name -; also potentally checks the ABI compliance (if enabled) -%macro define_func 1 -abi_checked_function %1 -%endmacro - -; define a test func that unrolls the loop by 100 -; with the given body instruction -; %1 - function name -; %2 - init instruction (e.g., xor out the variable you'll add to) -; %3 - loop body instruction -; %4 - repeat count, defaults to 100 - values other than 100 mean the Mops value will be wrong -%macro test_func 3-4 100 -define_func %1 -%2 -.top: -times %4 %3 -sub rdi, 100 -jnz .top -ret -%endmacro - -; pause -test_func pause_only, {}, {pause}, 1 - -; vpermw latency -test_func avx512_vpermw, {vpcmpeqd ymm0, ymm0, ymm0}, {vpermw zmm0, zmm0, zmm0} - -; vpermb latency -test_func avx512_vpermd, {vpcmpeqd ymm0, ymm0, ymm0}, {vpermd zmm0, zmm0, zmm0} - -; imul latency -test_func avx128_imul, {vpcmpeqd xmm0, xmm0, xmm0}, {vpmuldq xmm0, xmm0, xmm0} -test_func avx256_imul, {vpcmpeqd ymm0, ymm0, ymm0}, {vpmuldq ymm0, ymm0, ymm0} -test_func avx512_imul, {vpcmpeqd ymm0, ymm0, ymm0}, {vpmuldq zmm0, zmm0, zmm0} - -; imul throughput -test_func avx128_imul_t, {vpcmpeqd xmm0, xmm0, xmm0}, {vpmuldq xmm0, xmm1, xmm1} -test_func avx256_imul_t, {vpcmpeqd ymm0, ymm0, ymm0}, {vpmuldq ymm0, ymm1, ymm1} -test_func avx512_imul_t, {vpcmpeqd ymm0, ymm0, ymm0}, {vpmuldq zmm0, zmm1, zmm1} - -; iadd latency -test_func scalar_iadd, {xor eax, eax}, {add rax, rax} - -test_func avx128_iadd, {vpcmpeqd xmm0, xmm0, xmm0}, {vpaddq xmm0, xmm0, xmm0} -test_func avx256_iadd, {vpcmpeqd ymm0, ymm0, ymm0}, {vpaddq ymm0, ymm0, ymm0} -test_func avx512_iadd, {vpcmpeqd ymm0, ymm0, ymm0}, {vpaddq zmm0, zmm0, zmm0} - -; iadd latency with zmm16 -test_func avx128_iadd16, {vpternlogd xmm16, xmm16, xmm16, 0xff}, {vpaddq xmm16, xmm16, xmm16} -test_func avx256_iadd16, {vpternlogd ymm16, ymm16, ymm16, 0xff}, {vpaddq ymm16, ymm16, ymm16} -test_func avx512_iadd16, {vpternlogd zmm16, zmm16, zmm16, 0xff}, {vpaddq zmm16, zmm16, zmm16} - -; iadd throughput -test_func avx128_iadd_t, {vpcmpeqd xmm1, xmm0, xmm0}, {vpaddq xmm0, xmm1, xmm1} -test_func avx256_iadd_t, {vpcmpeqd ymm1, ymm0, ymm0}, {vpaddq ymm0, ymm1, ymm1} - -; zeroing xor -test_func avx128_xor_zero, {}, {vpxor xmm0, xmm0, xmm0} -test_func avx256_xor_zero, {}, {vpxor ymm0, ymm0, ymm0} -test_func avx512_xor_zero, {}, {vpxord zmm0, zmm0, zmm0} - -; vpsrlvd latency -test_func avx128_vshift, {vpcmpeqd xmm1, xmm0, xmm0}, {vpsrlvd xmm0, xmm0, xmm0} -test_func avx256_vshift, {vpcmpeqd xmm1, xmm0, xmm0}, {vpsrlvd ymm0, ymm0, ymm0} -test_func avx512_vshift, {vpcmpeqd xmm1, xmm0, xmm0}, {vpsrlvd zmm0, zmm0, zmm0} - -; vpsrlvd throughput -test_func avx128_vshift_t,{vpcmpeqd xmm1, xmm0, xmm0}, {vpsrlvd xmm0, xmm1, xmm1} -test_func avx256_vshift_t,{vpcmpeqd xmm1, xmm0, xmm0}, {vpsrlvd ymm0, ymm1, ymm1} -test_func avx512_vshift_t,{vpcmpeqd xmm1, xmm0, xmm0}, {vpsrlvd zmm0, zmm1, zmm1} - -; vplzcntd latency -test_func avx128_vlzcnt, {vpcmpeqd xmm1, xmm0, xmm0}, {vplzcntd xmm0, xmm0} -test_func avx256_vlzcnt, {vpcmpeqd xmm1, xmm0, xmm0}, {vplzcntd ymm0, ymm0} -test_func avx512_vlzcnt, {vpcmpeqd xmm1, xmm0, xmm0}, {vplzcntd zmm0, zmm0} - -; vplzcntd throughput -test_func avx128_vlzcnt_t,{vpcmpeqd xmm1, xmm0, xmm0}, {vplzcntd xmm0, xmm1} -test_func avx256_vlzcnt_t,{vpcmpeqd xmm1, xmm0, xmm0}, {vplzcntd ymm0, ymm1} -test_func avx512_vlzcnt_t,{vpcmpeqd xmm1, xmm0, xmm0}, {vplzcntd zmm0, zmm1} - -; FMA -test_func avx128_fma , {vpxor xmm0, xmm0, xmm0}, {vfmadd132pd xmm0, xmm0, xmm0} -test_func avx256_fma , {vpxor xmm0, xmm0, xmm0}, {vfmadd132pd ymm0, ymm0, ymm0} -test_func avx512_fma , {vpxor xmm0, xmm0, xmm0}, {vfmadd132pd zmm0, zmm0, zmm0} - -; this is like test_func, but it uses 10 parallel chains of instructions, -; unrolled 10 times, so (probably) max throughput at least if latency * throughput -; product for the instruction <= 10 -; %1 - function name -; %2 - init instruction (e.g., xor out the variable you'll add to) -; %3 - register base like xmm, ymm, zmm -; %4 - loop body instruction only (no operands) -; %5 - init value for xmm0-9, used as first (dest) arg as in vfmadd132pd xmm0..9, xmm10, xmm11 -; %6 - init value for xmm10, used as second arg as in vfmadd132pd reg, xmm10, xmm11 -; %7 - init value for xmm11, used as third arg as in vfmadd132pd reg, xmm10, xmm11 -%macro test_func_tput 7 -define_func %1 - -; init reg 0-9 -%assign r 0 -%rep 10 -%2 %3 %+ r, %5 -%assign r (r+1) -%endrep - -; init reg10, reg11 -%2 %3 %+ 10, %6 -%2 %3 %+ 11, %7 - -.top: -%rep 10 -%assign r 0 -%rep 10 -%4 %3 %+ r, %3 %+ 10, %3 %+ 11 -%assign r (r+1) -%endrep -%endrep -sub rdi, 100 -jnz .top -ret -%endmacro - -test_func_tput avx128_fma_t , vmovddup, xmm, vfmadd132pd, [zero_dp], [one_dp], [half_dp] -test_func_tput avx256_fma_t , vbroadcastsd, ymm, vfmadd132pd, [zero_dp], [one_dp], [half_dp] -test_func_tput avx512_fma_t , vbroadcastsd, zmm, vfmadd132pd, [zero_dp], [one_dp], [half_dp] -test_func_tput avx512_vpermw_t ,vbroadcastsd, zmm, vpermw, [zero_dp], [one_dp], [half_dp] -test_func_tput avx512_vpermd_t ,vbroadcastsd, zmm, vpermd, [zero_dp], [one_dp], [half_dp] - -; this is like test_func except that the 100x unrolled loop instruction is -; always a serial scalar add, while the passed instruction to test is only -; executed once per loop (so at a ratio of 1:100 for the scalar adds). This -; test the effect of an "occasional" AVX instruction. -; %1 - function name -; %2 - init instruction (e.g., xor out the variable you'll add to) -; %3 - loop body instruction -%macro test_func_sparse 4 -define_func %1 -%2 -%4 -xor eax, eax -.top: -%3 -times 100 add eax, eax -sub rdi, 100 -jnz .top -ret -%endmacro - -test_func_sparse avx128_mov_sparse, {vbroadcastsd ymm0, [one_dp]}, {vmovdqa xmm0, xmm0}, {} -test_func_sparse avx256_mov_sparse, {vbroadcastsd ymm0, [one_dp]}, {vmovdqa ymm0, ymm0}, {} -test_func_sparse avx512_mov_sparse, {vbroadcastsd zmm0, [one_dp]}, {vmovdqa32 zmm0, zmm0}, {} -test_func_sparse avx128_merge_sparse, {vbroadcastsd ymm0, [one_dp]}, {vmovdqa32 xmm0{k1}, xmm0}, {kmovw k1, [kmask]} -test_func_sparse avx256_merge_sparse, {vbroadcastsd ymm0, [one_dp]}, {vmovdqa32 ymm0{k1}, ymm0}, {kmovw k1, [kmask]} -test_func_sparse avx512_merge_sparse, {vbroadcastsd zmm0, [one_dp]}, {vmovdqa32 zmm0{k1}, zmm0}, {kmovw k1, [kmask]} - -test_func_sparse avx128_fma_sparse, {vbroadcastsd ymm0, [zero_dp]}, {vfmadd132pd xmm0, xmm0, xmm0 }, {} -test_func_sparse avx256_fma_sparse, {vbroadcastsd ymm0, [zero_dp]}, {vfmadd132pd ymm0, ymm0, ymm0 }, {} -test_func_sparse avx512_fma_sparse, {vbroadcastsd zmm0, [zero_dp]}, {vfmadd132pd zmm0, zmm0, zmm0 }, {} - -; %1 function name suffix -; %2 dirty instruction -%macro define_ucomis 2 -define_func ucomis_%1 -;vpxor xmm15, xmm15, xmm15 -;vzeroupper -%2 -movdqu xmm0, [one_dp] -movdqu xmm2, [one_dp] -movdqu xmm1, [zero_dp] -align 64 -.top: -%rep 100 -addsd xmm0, xmm2 -ucomisd xmm1, xmm0 -ja .never -%endrep -sub rdi, 100 -jnz .top -ret -.never: -ud2 -%endmacro - -define_ucomis clean, {vzeroupper} -define_ucomis dirty, {} - - -define_func dirty_it -vzeroupper -vpxord zmm15, zmm14, zmm15 -ret - -define_func dirty_it16 -vzeroupper -vpxord zmm16, zmm14, zmm15 -ret - -GLOBAL zeroupper_asm:function -zeroupper_asm: -vzeroupper -ret - -zero_dp: dq 0.0 -half_dp: dq 0.5 -one_dp: dq 1.0 -kmask: dq 0x5555555555555555 - - - diff --git a/tools/avx-turbo/atomic.h b/tools/avx-turbo/atomic.h deleted file mode 100644 index ea5704f..0000000 --- a/tools/avx-turbo/atomic.h +++ /dev/null @@ -1,409 +0,0 @@ -/* Atomic operations (v1) - * Portable Snippets - https://gitub.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * This is a small abstraction layer for some common atomic operations - * (load, store, add, subtract, and compare & swap) implemented using - * various compiler-specific builtins. - * - * There are four types, 32-bit and 64-bit integers which are both - * atomic and non-atomic. The atomic versions should be used for the - * atomic variable, the non-atomic variables should be used to store - * values read from or written to an atomic variable. For example, a - * basic CAS loop: - * - * void square_dest(psnip_atomic_int64* value) { - * psnip_int64_t expected; - * do { - * expected = psnip_atomic_int64_load(&value); - * } while (!psnip_atomic_int64_compare_exchange(&value, &expected, expected * expected)); - * } - * - * Most things are implemented with the preprocessor, but if they were - * functions the prototypes (the 64-bit versions, just s/64/32/ for - * the 32-bit versions) would loo like: - * - * psnip_int64_t psnip_atomic_int64_load( - * psnip_atomic_int64* object); - * void psnip_atomic_int64_store( - * psnip_atomic_int64* object, - * psnip_int64_t desired); - * _Bool psnip_atomic_int64_compare_exchange( - * psnip_atomic_int64* object, - * psnip_int64_t* expected, - * psnip_int64_t desired); - * psnip_int64_t psnip_atomic_int64_add( - * psnip_atomic_int64* object, - * psnip_int64_t operand); - * psnip_int64_t psnip_atomic_int64_sub( - * psnip_atomic_int64* object, - * psnip_int64_t operand); - */ - -#if !defined(PSNIP_ATOMIC_H) -#define PSNIP_ATOMIC_H - -#if !defined(psnip_int64_t) || !defined(psnip_int32_t) -# include "exact-int.h" -#endif - -#if !defined(PSNIP_ATOMIC_STATIC_INLINE) -# if defined(__GNUC__) -# define PSNIP_ATOMIC__COMPILER_ATTRIBUTES __attribute__((__unused__)) -# else -# define PSNIP_ATOMIC__COMPILER_ATTRIBUTES -# endif - -# if defined(HEDLEY_INLINE) -# define PSNIP_ATOMIC__INLINE HEDLEY_INLINE -# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -# define PSNIP_ATOMIC__INLINE inline -# elif defined(__GNUC_STDC_INLINE__) -# define PSNIP_ATOMIC__INLINE __inline__ -# elif defined(_MSC_VER) && _MSC_VER >= 1200 -# define PSNIP_ATOMIC__INLINE __inline -# else -# define PSNIP_ATOMIC__INLINE -# endif - -# define PSNIP_ATOMIC__FUNCTION PSNIP_ATOMIC__COMPILER_ATTRIBUTES static PSNIP_ATOMIC__INLINE -#endif - -#if defined(__has_feature) -# define PSNIP_ATOMIC_HAS_FEATURE(feature) __has_feature(feature) -#else -# define PSNIP_ATOMIC_HAS_FEATURE(feature) 0 -#endif - -#define PSNIP_ATOMIC_IMPL_NONE 0 -#define PSNIP_ATOMIC_IMPL_GCC 1 -#define PSNIP_ATOMIC_IMPL_GCC_SYNC 2 -#define PSNIP_ATOMIC_IMPL_CLANG 3 -#define PSNIP_ATOMIC_IMPL_MS 4 -#define PSNIP_ATOMIC_IMPL_OPENMP 5 -#define PSNIP_ATOMIC_IMPL_C11 11 - -#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_GCC -#elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) -/* GCC 4.7 and 4.8 sets __STDC_VERSION__ to C11 (if compiling in C11 - * mode) and didn't have stdatomic.h, but failed to set - * __STDC_NO_ATOMICS__. Verions prior to 4.7 didn't set - * __STDC_VERSION__ to C11. */ -# if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_GCC -# else -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_C11 -# endif -#elif defined(_MSC_VER) -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_MS -#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_GCC -#elif PSNIP_ATOMIC_HAS_FEATURE(c_atomic) -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_CLANG -#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_GCC_SYNC -#elif (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x5140)) || (defined(__SUNPRO_CC) && (__SUNPRO_CC >= 0x5140)) -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_GCC -#elif defined(_OPENMP) -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_OPENMP -#else -# define PSNIP_ATOMIC_NOT_FOUND -# define PSNIP_ATOMIC_IMPL PSNIP_ATOMIC_IMPL_NONE -# warning No atomic implementation found -#endif - -#if !defined(PSNIP_ATOMIC_NOT_FOUND) - -#if PSNIP_ATOMIC_IMPL == PSNIP_ATOMIC_IMPL_C11 - -#include -typedef atomic_int_fast64_t psnip_atomic_int64; -typedef atomic_int_fast32_t psnip_atomic_int32; - -#define PSNIP_ATOMIC_VAR_INIT(value) ATOMIC_VAR_INIT(value) - -#define psnip_atomic_int64_load(object) \ - atomic_load(object) -#define psnip_atomic_int64_store(object, desired) \ - atomic_store(object, desired) -#define psnip_atomic_int64_compare_exchange(object, expected, desired) \ - atomic_compare_exchange_strong(object, expected, desired) -#define psnip_atomic_int64_add(object, operand) \ - atomic_fetch_add(object, operand) -#define psnip_atomic_int64_sub(object, operand) \ - atomic_fetch_sub(object, operand) -#define psnip_atomic_fence() \ - atomic_thread_fence(memory_order_seq_cst) - -#define PSNIP_ATOMIC_IS_TG - -#elif PSNIP_ATOMIC_IMPL == PSNIP_ATOMIC_IMPL_CLANG - -#include -typedef _Atomic psnip_int64_t psnip_atomic_int64; -typedef _Atomic psnip_int32_t psnip_atomic_int32; - -#define psnip_atomic_int64_load(object) \ - __c11_atomic_load(object, __ATOMIC_SEQ_CST) -#define psnip_atomic_int64_store(object, desired) \ - __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST) -#define psnip_atomic_int64_compare_exchange(object, expected, desired) \ - __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) -#define psnip_atomic_int64_add(object, operand) \ - __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST) -#define psnip_atomic_int64_sub(object, operand) \ - __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST) -#define psnip_atomic_fence() \ - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST) - -#define PSNIP_ATOMIC_IS_TG - -#elif PSNIP_ATOMIC_IMPL == PSNIP_ATOMIC_IMPL_GCC - -#include -#if !defined(__INTEL_COMPILER) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) && !defined(_OPENMP) -typedef _Atomic psnip_int64_t psnip_atomic_int64; -typedef _Atomic psnip_int32_t psnip_atomic_int32; -#else -typedef psnip_int64_t psnip_atomic_int64; -typedef psnip_int32_t psnip_atomic_int32; -#endif - -#define psnip_atomic_int64_load(object) \ - __atomic_load_n(object, __ATOMIC_SEQ_CST) -#define psnip_atomic_int64_store(object, desired) \ - __atomic_store_n(object, desired, __ATOMIC_SEQ_CST) -#define psnip_atomic_int64_compare_exchange(object, expected, desired) \ - __atomic_compare_exchange_n(object, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) -#define psnip_atomic_int64_add(object, operand) \ - __atomic_add_fetch(object, operand, __ATOMIC_SEQ_CST) -#define psnip_atomic_int64_sub(object, operand) \ - __atomic_sub_fetch(object, operand, __ATOMIC_SEQ_CST) -#define psnip_atomic_fence() \ - __atomic_thread_fence(__ATOMIC_SEQ_CST) - -#define PSNIP_ATOMIC_IS_TG - -#elif PSNIP_ATOMIC_IMPL == PSNIP_ATOMIC_IMPL_GCC_SYNC - -#include -typedef psnip_int64_t psnip_atomic_int64; -typedef psnip_int32_t psnip_atomic_int32; - -PSNIP_ATOMIC__FUNCTION -psnip_int64_t -psnip_atomic_int64_load(psnip_atomic_int64* object) { - __sync_synchronize(); - return (psnip_int64_t) *object; -} - -PSNIP_ATOMIC__FUNCTION -void -psnip_atomic_int64_store(psnip_atomic_int64* object, psnip_int64_t desired) { - *object = desired; - __sync_synchronize(); -} - -#define psnip_atomic_int64_compare_exchange(object, expected, desired) \ - __sync_bool_compare_and_swap(object, *(expected), desired) -#define psnip_atomic_int64_add(object, operand) \ - __sync_fetch_and_add(object, operand) -#define psnip_atomic_int64_sub(object, operand) \ - __sync_fetch_and_sub(object, operand) - -PSNIP_ATOMIC__FUNCTION -psnip_int32_t -psnip_atomic_int32_load(psnip_atomic_int32* object) { - __sync_synchronize(); - return (psnip_int32_t) *object; -} - -PSNIP_ATOMIC__FUNCTION -void -psnip_atomic_int32_store(psnip_atomic_int32* object, psnip_int32_t desired) { - *object = desired; - __sync_synchronize(); -} - -#define psnip_atomic_int32_compare_exchange(object, expected, desired) \ - __sync_bool_compare_and_swap(object, *(expected), desired) -#define psnip_atomic_int32_add(object, operand) \ - __sync_fetch_and_add(object, operand) -#define psnip_atomic_int32_sub(object, operand) \ - __sync_fetch_and_sub(object, operand) - -#define psnip_atomic_fence() \ - __sync_synchronize() - -#elif PSNIP_ATOMIC_IMPL == PSNIP_ATOMIC_IMPL_MS - -#include - -typedef long long volatile psnip_atomic_int64; -typedef long volatile psnip_atomic_int32; - -#define psnip_atomic_int32_load(object) \ - __pragma(warning(push)) \ - __pragma(warning(disable:28112)) \ - (*(object)) \ - __pragma(warning(pop)) -#define psnip_atomic_int32_store(object, desired) \ - InterlockedExchange(object, desired) -#define psnip_atomic_int32_compare_exchange(object, expected, desired) \ - InterlockedCompareExchange(object, desired, *(expected)) -#define psnip_atomic_int32_add(object, operand) \ - InterlockedExchangeAdd(object, operand) -#define psnip_atomic_int32_sub(object, operand) \ - InterlockedExchangeAdd(object, -(operand)) - -#define psnip_atomic_int64_load(object) \ - __pragma(warning(push)) \ - __pragma(warning(disable:28112)) \ - (*(object)) \ - __pragma(warning(pop)) -#define psnip_atomic_int64_store(object, desired) \ - InterlockedExchange64(object, desired) -#define psnip_atomic_int64_compare_exchange(object, expected, desired) \ - InterlockedCompareExchange64(object, desired, *(expected)) -#define psnip_atomic_int64_add(object, operand) \ - InterlockedExchangeAdd64(object, operand) -#define psnip_atomic_int64_sub(object, operand) \ - InterlockedExchangeAdd64(object, -(operand)) - -#define psnip_atomic_fence() \ - MemoryBarrier() - -#elif PSNIP_ATOMIC_IMPL == PSNIP_ATOMIC_IMPL_OPENMP - -#include -typedef psnip_int64_t psnip_atomic_int64; -typedef psnip_int32_t psnip_atomic_int32; - -PSNIP_ATOMIC__FUNCTION -psnip_int64_t -psnip_atomic_int64_load(psnip_atomic_int64* object) { - psnip_int64_t ret; -#pragma omp critical(psnip_atomic) - ret = *object; - return ret; -} - -PSNIP_ATOMIC__FUNCTION -void -psnip_atomic_int64_store(psnip_atomic_int64* object, psnip_int64_t desired) { -#pragma omp critical(psnip_atomic) - *object = desired; -} - -PSNIP_ATOMIC__FUNCTION -int -psnip_atomic_int64_compare_exchange_(psnip_atomic_int64* object, psnip_int64_t* expected, psnip_int64_t desired) { - int ret; -#pragma omp critical(psnip_atomic) - ret = (*object == *expected) ? ((*object = desired), 1) : 0; - return ret; -} - -#define psnip_atomic_int64_compare_exchange(object, expected, desired) \ - psnip_atomic_int64_compare_exchange_(object, expected, desired) - -PSNIP_ATOMIC__FUNCTION -psnip_int64_t -psnip_atomic_int64_add(psnip_atomic_int64* object, psnip_int64_t operand) { - int ret; -#pragma omp critical(psnip_atomic) - *object = (ret = *object) + operand; - return ret; -} - -PSNIP_ATOMIC__FUNCTION -psnip_int64_t -psnip_atomic_int64_sub(psnip_atomic_int64* object, psnip_int64_t operand) { - int ret; -#pragma omp critical(psnip_atomic) - *object = (ret = *object) - operand; - return ret; -} -PSNIP_ATOMIC__FUNCTION -psnip_int32_t -psnip_atomic_int32_load(psnip_atomic_int32* object) { - psnip_int32_t ret; -#pragma omp critical(psnip_atomic) - ret = *object; - return ret; -} - -PSNIP_ATOMIC__FUNCTION -void -psnip_atomic_int32_store(psnip_atomic_int32* object, psnip_int32_t desired) { -#pragma omp critical(psnip_atomic) - *object = desired; -} - -PSNIP_ATOMIC__FUNCTION -int -psnip_atomic_int32_compare_exchange_(psnip_atomic_int32* object, psnip_int32_t* expected, psnip_int32_t desired) { - int ret = 1; -#pragma omp critical(psnip_atomic) - ret = (*object == *expected) ? ((*object = desired), 1) : 0; - return ret; -} - -#define psnip_atomic_int32_compare_exchange(object, expected, desired) \ - psnip_atomic_int32_compare_exchange_(object, expected, desired) - -PSNIP_ATOMIC__FUNCTION -psnip_int32_t -psnip_atomic_int32_add(psnip_atomic_int32* object, psnip_int32_t operand) { - int ret; -#pragma omp critical(psnip_atomic) - *object = (ret = *object) + operand; - return ret; -} - -PSNIP_ATOMIC__FUNCTION -psnip_int32_t -psnip_atomic_int32_sub(psnip_atomic_int32* object, psnip_int32_t operand) { - int ret; -#pragma omp critical(psnip_atomic) - *object = (ret = *object) - operand; - return ret; -} - -PSNIP_ATOMIC__FUNCTION -void -psnip_atomic_fence() { -#pragma omp critical(psnip_atomic) - { } -} - - -#endif - -#if !defined(PSNIP_ATOMIC_VAR_INIT) -# define PSNIP_ATOMIC_VAR_INIT(value) (value) -#endif - -/* Most compilers have type-generic atomic implementations. */ -#if defined(PSNIP_ATOMIC_IS_TG) -#define psnip_atomic_int32_load(object) \ - psnip_atomic_int64_load(object) -#define psnip_atomic_int32_store(object, desired) \ - psnip_atomic_int64_store(object, desired) -#define psnip_atomic_int32_compare_exchange(object, expected, desired) \ - psnip_atomic_int64_compare_exchange(object, expected, desired) -#define psnip_atomic_int32_add(object, operand) \ - psnip_atomic_int64_add(object, operand) -#define psnip_atomic_int32_sub(object, operand) \ - psnip_atomic_int64_sub(object, operand) -#endif /* defined(PSNIP_ATOMIC_IS_TG) */ - -#endif /* !defined(PSNIP_ATOMIC_NOT_FOUND) */ - -#endif /* defined(PSNIP_ATOMIC_H) */ diff --git a/tools/avx-turbo/avx-turbo.cpp b/tools/avx-turbo/avx-turbo.cpp deleted file mode 100644 index ed7523e..0000000 --- a/tools/avx-turbo/avx-turbo.cpp +++ /dev/null @@ -1,890 +0,0 @@ -/* - * avx-turbo.cpp - */ - -#include "args.hxx" -#include "cpu.h" -#include "cpuid.hpp" -#include "msr-access.h" -#include "stats.hpp" -#include "tsc-support.hpp" -#include "table.hpp" -#include "util.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - - -#define MSR_IA32_MPERF 0x000000e7 -#define MSR_IA32_APERF 0x000000e8 - -using std::uint64_t; -using namespace std::chrono; - -using namespace Stats; - -typedef void (cal_f)(uint64_t iters); - -enum ISA { - BASE = 1 << 0, - AVX2 = 1 << 1, - AVX512F = 1 << 2, // note: does not imply VL, so xmm and ymm may not be available - AVX512VL = 1 << 3, // note: does not imply F, although i don't know any CPU with VL but not F - AVX512CD = 1 << 4, - AVX512BW = 1 << 5, -}; - -struct test_func { - // function pointer to the test function - cal_f* func; - const char* id; - const char* description; - ISA isa; -}; - - -#define FUNCS_X(x) \ - x(pause_only , "pause instruction" , BASE) \ - x(ucomis_clean , "scalar ucomis (w/ vzeroupper)" , AVX2) \ - x(ucomis_dirty , "scalar ucomis (no vzeroupper)" , AVX2) \ - \ - /* iadd */ \ - x(scalar_iadd , "Scalar integer adds" , BASE) \ - x(avx128_iadd , "128-bit integer serial adds" , AVX2 ) \ - x(avx256_iadd , "256-bit integer serial adds" , AVX2 ) \ - x(avx512_iadd , "512-bit integer serial adds" , AVX512F) \ - \ - x(avx128_iadd16 , "128-bit integer serial adds zmm16", AVX512VL) \ - x(avx256_iadd16 , "256-bit integer serial adds zmm16", AVX512VL) \ - x(avx512_iadd16 , "512-bit integer serial adds zmm16", AVX512F) \ - \ - /* iadd throughput */ \ - x(avx128_iadd_t , "128-bit integer parallel adds" , AVX2 ) \ - x(avx256_iadd_t , "256-bit integer parallel adds" , AVX2 ) \ - \ - /* zeroing xor */ \ - x(avx128_xor_zero , "128-bit zeroing xor" , AVX2 ) \ - x(avx256_xor_zero , "256-bit zeroing xor" , AVX2 ) \ - x(avx512_xor_zero , "512-bit zeroing xord" , AVX512F) \ - \ - /* reg-reg mov */ \ - x(avx128_mov_sparse , "128-bit reg-reg mov" , AVX2) \ - x(avx256_mov_sparse , "256-bit reg-reg mov" , AVX2 ) \ - x(avx512_mov_sparse , "512-bit reg-reg mov" , AVX512F) \ - \ - /* merge */ \ - x(avx128_merge_sparse , "128-bit reg-reg merge mov" , AVX512VL) \ - x(avx256_merge_sparse , "256-bit reg-reg merge mov" , AVX512VL) \ - x(avx512_merge_sparse , "512-bit reg-reg merge mov" , AVX512F) \ - \ - /* variable shift latency */ \ - x(avx128_vshift , "128-bit variable shift (vpsrlvd)", AVX2 ) \ - x(avx256_vshift , "256-bit variable shift (vpsrlvd)", AVX2 ) \ - x(avx512_vshift , "512-bit variable shift (vpsrlvd)", AVX512F) \ - /* variable shift throughput */ \ - x(avx128_vshift_t , "128-bit variable shift (vpsrlvd)", AVX2 ) \ - x(avx256_vshift_t , "256-bit variable shift (vpsrlvd)", AVX2 ) \ - x(avx512_vshift_t , "512-bit variable shift (vpsrlvd)", AVX512F) \ - \ - /* vplzcntd latency */ \ - x(avx128_vlzcnt , "128-bit lzcnt (vplzcntd)", AVX512CD | AVX512VL) \ - x(avx256_vlzcnt , "256-bit lzcnt (vplzcntd)", AVX512CD | AVX512VL) \ - x(avx512_vlzcnt , "512-bit lzcnt (vplzcntd)", AVX512CD) \ - /* vplzcntd throughput */ \ - x(avx128_vlzcnt_t , "128-bit lzcnt (vplzcntd)", AVX512CD | AVX512VL) \ - x(avx256_vlzcnt_t , "256-bit lzcnt (vplzcntd)", AVX512CD | AVX512VL) \ - x(avx512_vlzcnt_t , "512-bit lzcnt (vplzcntd)", AVX512CD) \ - \ - x(avx128_imul , "128-bit integer muls (vpmuldq)" , AVX2 ) \ - x(avx256_imul , "256-bit integer muls (vpmuldq)" , AVX2 ) \ - x(avx512_imul , "512-bit integer muls (vpmuldq)" , AVX512F) \ - \ - /* fma */ \ - x(avx128_fma_sparse , "128-bit 64-bit sparse FMAs" , AVX2 ) \ - x(avx256_fma_sparse , "256-bit 64-bit sparse FMAs" , AVX2 ) \ - x(avx512_fma_sparse , "512-bit 64-bit sparse FMAs" , AVX512F) \ - x(avx128_fma , "128-bit serial DP FMAs" , AVX2 ) \ - x(avx256_fma , "256-bit serial DP FMAs" , AVX2 ) \ - x(avx512_fma , "512-bit serial DP FMAs" , AVX512F) \ - x(avx128_fma_t , "128-bit parallel DP FMAs" , AVX2 ) \ - x(avx256_fma_t , "256-bit parallel DP FMAs" , AVX2 ) \ - x(avx512_fma_t , "512-bit parallel DP FMAs" , AVX512F) \ - \ - x(avx512_vpermw , "512-bit serial WORD permute" , AVX512BW) \ - x(avx512_vpermw_t , "512-bit parallel WORD permute" , AVX512BW) \ - x(avx512_vpermd , "512-bit serial DWORD permute" , AVX512F) \ - x(avx512_vpermd_t , "512-bit parallel DWORD permute" , AVX512F) \ - - -#define DECLARE(f,...) cal_f f; - -extern "C" { -// functions declared in asm-methods.asm -FUNCS_X(DECLARE); - - -// misc helpers -void zeroupper_asm(); - -static bool zeroupper_allowed; - -void zeroupper() { - if (zeroupper_allowed) zeroupper_asm(); -} - -} - -#define MAKE_STRUCT(f, d, i) { f, #f, d, (ISA)(i) }, -const test_func ALL_FUNCS[] = { -FUNCS_X(MAKE_STRUCT) -}; - -void pin_to_cpu(int cpu) { - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); - if (sched_setaffinity(0, sizeof(cpuset), &cpuset) == -1) { - error(EXIT_FAILURE, errno, "could not pin to CPU %d", cpu); - } -} - -/** args */ -args::ArgumentParser parser{"avx-turbo: Determine AVX2 and AVX-512 downclocking behavior"}; -args::HelpFlag help{parser, "help", "Display this help menu", {'h', "help"}}; -args::Flag arg_force_tsc_cal{parser, "force-tsc-calibrate", - "Force manual TSC calibration loop, even if cpuid TSC Hz is available", {"force-tsc-calibrate"}}; -args::Flag arg_no_pin{parser, "no-pin", - "Don't try to pin threads to CPU - gives worse results but works around affinity issues on TravisCI", {"no-pin"}}; -args::Flag arg_verbose{parser, "verbose", "Output more info", {"verbose"}}; -args::Flag arg_nobarrier{parser, "no-barrier", "Don't sync up threads before each test (debugging only)", {"no-barrier"}}; -args::Flag arg_list{parser, "list", "List the available tests and their descriptions", {"list"}}; -args::Flag arg_hyperthreads{parser, "allow-hyperthreads", "By default we try to filter down the available cpus to include only physical cores, but " - "with this option we'll use all logical cores meaning you'll run two tests on cores with hyperthreading", {"allow-hyperthreads"}}; -args::Flag arg_dirty{parser, "dirty-upper", "AVX-512 only: the 512-bit zmm15 register is dirtied befor each test", - {"dirty-upper"}}; -args::Flag arg_dirty16{parser, "dirty-upper", "AVX-512 only: the 512-bit zmm16 register is dirtied befor each test", - {"dirty-upper16"}}; -args::ValueFlag arg_focus{parser, "TEST-ID", "Run only the specified test (by ID)", {"test"}}; -args::ValueFlag arg_spec{parser, "SPEC", "Run a specific type of test specified by a specification string", {"spec"}}; -args::ValueFlag arg_iters{parser, "ITERS", "Run the test loop ITERS times (default 100000)", {"iters"}, 100000}; -args::ValueFlag arg_min_threads{parser, "MIN", "The minimum number of threads to use", {"min-threads"}, 1}; -args::ValueFlag arg_max_threads{parser, "MAX", "The maximum number of threads to use", {"max-threads"}}; -args::ValueFlag arg_num_cpus{parser, "CPUS", "Override number of available CPUs", {"num-cpus"}}; -args::ValueFlag arg_warm_ms{parser, "MILLISECONDS", "Warmup milliseconds for each thread after pinning (default 100)", {"warmup-ms"}, 100}; -args::ValueFlag arg_cpuids{parser, "CPUIDS", "Pin threads to comma-separated list of CPU IDs (default sequential ids)", {"cpuids"}}; - -bool verbose; - -template -struct StdClock { - using now_t = decltype(CHRONO_CLOCK::now()); - using delta_t = typename CHRONO_CLOCK::duration; - - static now_t now() { - return CHRONO_CLOCK::now(); - } - - /* accept the result of subtraction of durations and convert to nanos */ - static uint64_t to_nanos(typename CHRONO_CLOCK::duration d) { - return duration_cast(d).count(); - } -}; - -struct RdtscClock { - using now_t = uint64_t; - using delta_t = uint64_t; - - static now_t now() { - _mm_lfence(); - now_t ret = rdtsc(); - _mm_lfence(); - return ret; - } - - /* accept the result of subtraction of durations and convert to nanos */ - static uint64_t to_nanos(now_t diff) { - static double tsc_to_nanos = 1000000000.0 / tsc_freq(); - return diff * tsc_to_nanos; - } - - static uint64_t tsc_freq() { - static uint64_t freq = get_tsc_freq(arg_force_tsc_cal); - return freq; - } - -}; - -/** - * We pass an outer_clock to run_test which times outside the iteration of the innermost loop (i.e., - * it times around the loop that runs TRIES times), start should reset the state unless you want to - * time warmup iterations. - */ -struct outer_timer { - virtual void start() = 0; - virtual void stop() = 0; - virtual ~outer_timer() {} -}; - -struct dummy_outer : outer_timer { - static dummy_outer dummy; - virtual void start() override {}; - virtual void stop() override {}; -}; -dummy_outer dummy_outer::dummy{}; - -/** lets you determine the actual frequency over any interval using the free-running APERF and MPERF counters */ -struct aperf_ghz : outer_timer { - uint64_t mperf_value, aperf_value, tsc_value; - enum { - STARTED, STOPPED - } state; - - aperf_ghz() : mperf_value(0), aperf_value(0), tsc_value(0), state(STOPPED) {} - - static uint64_t mperf() { - return read(MSR_IA32_MPERF); - } - - static uint64_t aperf() { - return read(MSR_IA32_APERF); - } - - static uint64_t read(uint32_t msr) { - uint64_t value = -1; - int res = read_msr_cur_cpu(msr, &value); - assert(res == 0); - return value; - } - - /** - * Return true iff APERF and MPERF MSR reads appear to work - */ - static bool is_supported() { - uint64_t dummy; - return read_msr(1, MSR_IA32_MPERF, &dummy) == 0 - && read_msr(1, MSR_IA32_APERF, &dummy) == 0; - } - - virtual void start() override { - assert(state == STOPPED); - state = STARTED; - mperf_value = mperf(); - aperf_value = aperf(); - tsc_value = rdtsc(); -// printf("started timer m: %lu\n", mperf_value); -// printf("started timer a: %lu\n", aperf_value); - }; - - virtual void stop() override { - assert(state == STARTED); - mperf_value = mperf() - mperf_value; - aperf_value = aperf() - aperf_value; - tsc_value = rdtsc() - tsc_value; - state = STOPPED; -// printf("stopped timer m: %lu (delta)\n", mperf_value); -// printf("stopped timer a: %lu (delta)\n", aperf_value); - }; - - /** aperf / mperf ratio */ - double am_ratio() { - assert(state == STOPPED); - assert(mperf_value != 0 && aperf_value != 0); -// printf("timer ratio m: %lu (delta)\n", mperf_value); -// printf("timer ratio a: %lu (delta)\n", aperf_value); - return (double)aperf_value / mperf_value; - } - - /** mperf / tsc ratio, i.e., the % of the time the core was unhalted */ - double mt_ratio() { - assert(state == STOPPED); - assert(mperf_value != 0 && tsc_value != 0); -// printf("timer ratio m: %lu (delta)\n", mperf_value); -// printf("timer ratio a: %lu (delta)\n", aperf_value); - return (double)mperf_value / tsc_value; - } - - -}; - -/* - * The result of the run_test method, with only the stuff - * that can be calculated from within that method. - */ -struct inner_result { - /* calculated Mops value */ - double mops; - uint64_t ostart_ts, oend_ts; - uint64_t istart_ts, iend_ts; // start and end timestamps for the "critical" benchmark portion -}; - -/* - * Calculate the frequency of the CPU based on timing a tight loop that we expect to - * take one iteration per cycle. - * - * ITERS is the base number of iterations to use: the calibration routine is actually - * run twice, once with ITERS iterations and once with 2*ITERS, and a delta is used to - * remove measurement overhead. - */ -struct hot_barrier { - size_t break_count; - std::atomic current; - hot_barrier(size_t count) : break_count(count), current{0} {} - - /* increment the arrived count of the barrier (do this once per thread generally) */ - void increment() { - current++; - } - - /* return true if all the threads have arrived, never blocks */ - bool is_broken() { - return current.load() == break_count; - } - - /* increment and hot spin on the waiter count until it hits the break point, returns the spin count in case you care */ - long wait() { - increment(); - long count = 0; - while (!is_broken()) { - count++; - } - return count; - } -}; - -// dirties zmm15 upper bits -extern "C" void dirty_it(); -// dirties zmm15 upper bits -extern "C" void dirty_it16(); - -template -inner_result run_test(cal_f* func, size_t iters, outer_timer& outer, hot_barrier *barrier) { - assert(iters % 100 == 0); - - std::array results; - - inner_result result; - - if (arg_dirty) { - dirty_it(); - } - - if (arg_dirty16) { - dirty_it16(); - } - - result.ostart_ts = RdtscClock::now(); - for (size_t w = 0; w < WARMUP + 1; w++) { - result.istart_ts = RdtscClock::now(); - outer.start(); - for (size_t r = 0; r < TRIES; r++) { - auto t0 = CLOCK::now(); - func(iters); - auto t1 = CLOCK::now(); - func(iters * 2); - auto t2 = CLOCK::now(); - results[r] = (t2 - t1) - (t1 - t0); - } - outer.stop(); - result.iend_ts = RdtscClock::now(); - } - - for (barrier->increment(); !barrier->is_broken();) { - func(iters); - } - result.oend_ts = RdtscClock::now(); - - std::array nanos = {}; - std::transform(results.begin(), results.end(), nanos.begin(), CLOCK::to_nanos); - DescriptiveStats stats = get_stats(nanos.begin(), nanos.end()); - - result.mops = ((double)iters / stats.getMedian()); - return result; -} - -ISA get_isas() { - int ret = BASE; - ret |= psnip_cpu_feature_check(PSNIP_CPU_FEATURE_X86_AVX2 ) ? AVX2 : 0; - ret |= psnip_cpu_feature_check(PSNIP_CPU_FEATURE_X86_AVX512F ) ? AVX512F : 0; - ret |= psnip_cpu_feature_check(PSNIP_CPU_FEATURE_X86_AVX512VL) ? AVX512VL : 0; - ret |= psnip_cpu_feature_check(PSNIP_CPU_FEATURE_X86_AVX512CD) ? AVX512CD : 0; - ret |= psnip_cpu_feature_check(PSNIP_CPU_FEATURE_X86_AVX512BW) ? AVX512BW : 0; - return (ISA)ret; -} - -bool should_run(const test_func& t, ISA isas_supported) { - return (t.isa & isas_supported) == t.isa; -} - -/* - * A test_spec contains the information needed to run one test. It is composed of - * a list of test_funcs, which should be run in parallel on separate threads. - */ -struct test_spec { - std::string name; - std::string description; - std::vector thread_funcs; - - test_spec(std::string name, std::string description) : name{name}, description{description} {} - - /** how many threads/funcs in this test */ - size_t count() const { return thread_funcs.size(); } - - std::string to_string() const { - std::string ret; - for (auto& t : thread_funcs) { - ret += t.id; - ret += ','; - } - return ret; - } -}; - - -/* find the test that exactly matches the given ID or return nullptr if not found */ -const test_func *find_one_test(const std::string& id) { - for (const auto& t : ALL_FUNCS) { - if (id == t.id) { - return &t; - } - } - return nullptr; -} - -/** - * If the user didn't specify any particular test spec, just create for every thread count - * value T and runnable func, a spec with T copies of func. - */ -std::vector make_default_tests(ISA isas_supported, std::vector cpus) { - std::vector ret; - - size_t maxcpus; - if (arg_max_threads) { - auto max = arg_max_threads.Get(); - if (max > (int)cpus.size()) { - printf("WARNING: can't run the requested number of threads (%d) because there are only %d available logical CPUs.\n", - max, (int)cpus.size()); - maxcpus = (int)cpus.size(); - } else { - maxcpus = max; - } - } else { - maxcpus = cpus.size(); - } - - printf("Will test up to %lu CPUs\n", maxcpus); - - auto try_add = [&ret](const test_func& t, size_t thread_count) { - test_spec spec(t.id, t.description); - spec.thread_funcs.resize(thread_count, t); // fill with thread_count copies of t - ret.push_back(std::move(spec)); - }; - - std::vector funcs; // the selected test functions - if (arg_focus) { - for (auto& focus : split(arg_focus.Get(), ",")) { - auto t = find_one_test(focus); - if (!t) { - printf("WARNING: Can't find specified test: %s\n", focus.c_str()); - } else { - funcs.push_back(*t); - } - } - } else { - funcs.insert(funcs.begin(), std::begin(ALL_FUNCS), std::end(ALL_FUNCS)); - } - - for (size_t thread_count = arg_min_threads.Get(); thread_count <= maxcpus; thread_count++) { - for (const auto& t : funcs) { - if (should_run(t, isas_supported)) { - try_add(t, thread_count); - } - } - } - - return ret; -} - - -std::vector make_from_spec(ISA, std::vector cpus) { - std::string str = arg_spec.Get(); - if (verbose) printf("Making tests from spec string: %s\n", str.c_str()); - - test_spec spec{str, ""}; - for (auto& elem : split(str,",")) { - if (verbose) printf("Elem: %s\n", elem.c_str()); - std::vector halves = split(elem,"/"); - assert(halves.size() > 0); - if (halves.size() > 2) { - throw std::runtime_error(std::string("bad spec syntax in element: '" + elem + "'")); - } - int count = (halves.size() == 1 ? 1 : std::atoi(halves[1].c_str())); - const test_func* test = find_one_test(halves[0]); - if (!test) { - throw std::runtime_error("couldn't find test: '" + halves[0] + "'"); - } - - spec.thread_funcs.insert(spec.thread_funcs.end(), count, *test); - } - - if (spec.count() > cpus.size()) { - printf("ERROR: this spec requires %d CPUs but only %d are available.\n", (int)spec.count(), (int)cpus.size()); - exit(EXIT_FAILURE); - } - - return {spec}; -} - -std::vector filter_tests(ISA isas_supported, std::vector cpus) { - if (!arg_spec) { - return make_default_tests(isas_supported, cpus); - } else { - return make_from_spec(isas_supported, cpus); - } -} - -struct result { - static constexpr double nan = std::numeric_limits::quiet_NaN(); - inner_result inner; - - uint64_t start_ts; // start timestamp - uint64_t end_ts; // end timestamp - - /* optional stuff associated with outer_timer */ - double aperf_am = nan; - double aperf_mt = nan; -}; - -struct result_holder { - const test_spec* spec; - std::vector results; // will have spec.count() elements - - result_holder(const test_spec* spec) : spec(spec) {} - - /** calculate the overlap ratio based on the start/end timestamps */ - double get_overlap1() const { - std::vector> ranges = transformv(results, [](const result& r){ return std::make_pair(r.start_ts, r.end_ts);} ); - return conc_ratio(ranges.begin(), ranges.end()); - } - - /** calculate the overlap ratio based on the start/end timestamps */ - double get_overlap2() const { - std::vector> ranges = transformv(results, [](const result& r){ return std::make_pair(r.inner.istart_ts, r.inner.iend_ts);} ); - return conc_ratio(ranges.begin(), ranges.end()); - } - - /** calculate the inner overlap ratio based on the start/end timestamps */ - double get_overlap3() const { - auto orange = transformv(results, [](const result& r){ return std::make_pair(r.inner.ostart_ts, r.inner.oend_ts);} ); - auto irange = transformv(results, [](const result& r){ return std::make_pair(r.inner.istart_ts, r.inner.iend_ts);} ); - return nconc_ratio(orange.begin(), orange.end(), irange.begin(), irange.end()); - } -}; - -struct warmup { - uint64_t millis; - warmup(uint64_t millis) : millis{millis} {} - - long warm() { - int64_t start = (int64_t)RdtscClock::now(); - long iters = 0; - while (RdtscClock::to_nanos(RdtscClock::now() - start) < 1000000u * millis) { - iters++; - } - return iters; - } -}; - -struct test_thread { - size_t id; - size_t cpu_id; - hot_barrier* start_barrier; - hot_barrier* stop_barrier; - - /* output */ - result res; - - /* input */ - const test_func* test; - size_t iters; - bool use_aperf; - - std::thread thread; - - test_thread(size_t id, size_t cpu_id, hot_barrier& start_barrier, hot_barrier& stop_barrier, const test_func *test, size_t iters, bool use_aperf) : - id{id}, cpu_id{cpu_id}, start_barrier{&start_barrier}, stop_barrier{&stop_barrier}, test{test}, - iters{iters}, use_aperf{use_aperf}, thread{std::ref(*this)} - { - // if (verbose) printf("Constructed test in thread %lu, this = %p\n", id, this); - } - - test_thread(const test_thread&) = delete; - test_thread(test_thread&&) = delete; - void operator=(const test_thread&) = delete; - - void operator()() { - // if (verbose) printf("Running test in thread %lu, this = %p\n", id, this); - if (!arg_no_pin) { - pin_to_cpu(cpu_id); - } - aperf_ghz aperf_timer; - outer_timer& outer = use_aperf ? static_cast(aperf_timer) : dummy_outer::dummy; - warmup w{arg_warm_ms.Get()}; - long warms = w.warm(); - if (verbose) printf("[%2lu] Warmup iters %lu\n", id, warms); - if (!arg_nobarrier) { - long count = start_barrier->wait(); - if (verbose) printf("[%2lu] Thread loop count: %ld\n", id, count); - } - res.start_ts = RdtscClock::now(); - res.inner = run_test(test->func, iters, outer, stop_barrier); - res.end_ts = RdtscClock::now(); - res.aperf_am = use_aperf ? aperf_timer.am_ratio() : 0.0; - res.aperf_mt = use_aperf ? aperf_timer.mt_ratio() : 0.0; - } -}; - -template -std::string result_string(const std::vector& results, const char* format, E e) { - std::string s; - for (const auto& result : results) { - if (!s.empty()) s += ", "; - s += table::string_format(format, e(result)); - } - return s; -} - -void report_results(const std::vector& results_list, bool use_aperf) { - // report - table::Table table; - table.setColColumnSeparator(" | "); - - auto& header = table.newRow(); - - using table::ColInfo; - - auto adder = [&header, &table](const char* s, ColInfo::Justification just = ColInfo::LEFT) { - header.add(s); - table.colInfo(header.size() - 1).justify = just; - }; - - adder("Cores"); - adder("ID"); - adder("Description"); - // adder("OVRLP1", ColInfo::RIGHT); - // adder("OVRLP2", ColInfo::RIGHT); - adder("OVRLP3", ColInfo::RIGHT); - adder("Mops", ColInfo::RIGHT); - - if (use_aperf) { - adder("A/M-ratio", ColInfo::RIGHT); - adder("A/M-MHz", ColInfo::RIGHT); - adder("M/tsc-ratio", ColInfo::RIGHT); - } - - for (const result_holder& holder : results_list) { - auto spec = holder.spec; - auto &row = table.newRow() - .add(spec->count()) - .add(spec->name) - .add(spec->description) - // .addf("%5.3f", holder.get_overlap1()) - // .addf("%5.3f", holder.get_overlap2()) - .addf("%5.3f", holder.get_overlap3()); - - auto& results = holder.results; - row.add(result_string(results, "%5.0f", [](const result& r){ return r.inner.mops * 1000; })); - if (use_aperf) { - row.add(result_string(results, "%4.2f", [](const result& r){ return r.aperf_am; })); - row.add(result_string(results, "%.0f", [](const result& r){ return r.aperf_am / 1000000.0 * RdtscClock::tsc_freq(); })); - row.add(result_string(results, "%4.2f", [](const result& r){ return r.aperf_mt; })); - } - } - - printf("%s\n", table.str().c_str()); -} - -void list_tests() { - table::Table table; - table.newRow().add("ID").add("Description"); - for (auto& t : ALL_FUNCS) { - table.newRow().add(t.id).add(t.description); - } - printf("Available tests:\n\n%s\n", table.str().c_str()); -} - -std::vector get_cpus() { - if (arg_num_cpus) { - auto cpu_num = arg_num_cpus.Get(); - std::vector ret; - for (int cpu = 0; cpu < cpu_num; ++cpu) { - ret.push_back(cpu); - } - return ret; - } - cpu_set_t cpu_set; - if (sched_getaffinity(0, sizeof(cpu_set), &cpu_set)) { - err(EXIT_FAILURE, "failed while getting cpu affinity"); - } - std::vector ret; - for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { - if (CPU_ISSET(cpu, &cpu_set)) { - ret.push_back(cpu); - } - } - return ret; -} - -/* try to filter the CPU list to return only physical CPUs */ -std::vector filter_cpus(std::vector cpus) { - int shift = get_smt_shift(); - if (shift == -1) { - printf("Can't use cpuid leaf 0xb to filter out hyperthreads, CPU too old or AMD\n"); - return cpus; - } - cpu_set_t original_set; - if (sched_getaffinity(0, sizeof(original_set), &original_set)) { - err(EXIT_FAILURE, "failed while getting cpu affinity"); - } - std::vector filtered_cpus; - std::set coreid_set; - for (int cpu : cpus) { - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); - if (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset)) { - err(EXIT_FAILURE, "failed to sched_setaffinity in filter_cpus"); - } - cpuid_result leafb = cpuid(0xb); - uint32_t apicid = leafb.edx, coreid = apicid >> shift; - if (verbose) printf("cpu %d has x2apic ID %u, coreid %u\n", cpu, apicid, coreid); - if (coreid_set.insert(coreid).second) { - filtered_cpus.push_back(cpu); - } - } - // restore original affinity - sched_setaffinity(0, sizeof(cpu_set_t), &original_set); - return filtered_cpus; -} - -int main(int argc, char** argv) { - - try { - parser.ParseCLI(argc, argv); - if (arg_iters.Get() % 100 != 0) { - printf("ITERS must be a multiple of 100\n"); - exit(EXIT_FAILURE); - } - } catch (args::Help& help) { - printf("%s\n", parser.Help().c_str()); - exit(EXIT_SUCCESS); - } catch (const args::ParseError& e) { - printf("ERROR while parsing arguments: %s\n", e.what()); - printf("\nUsage:\n%s\n", parser.Help().c_str()); - exit(EXIT_FAILURE); - } - - if (arg_list) { - list_tests(); - exit(EXIT_SUCCESS); - } - - verbose = arg_verbose; - bool is_root = (geteuid() == 0); - bool use_aperf = aperf_ghz::is_supported(); - printf("CPUID highest leaf : [%2xh]\n", cpuid_highest_leaf()); - printf("Running as root : [%s]\n", is_root ? "YES" : "NO "); - printf("MSR reads supported : [%s]\n", use_aperf ? "YES" : "NO "); - printf("CPU pinning enabled : [%s]\n", !arg_no_pin ? "YES" : "NO "); - - ISA isas_supported = get_isas(); - zeroupper_allowed = isas_supported & AVX2; - printf("CPU supports zeroupper: [%s]\n", zeroupper_allowed ? "YES" : "NO "); - printf("CPU supports AVX2 : [%s]\n", isas_supported & AVX2 ? "YES" : "NO "); - printf("CPU supports AVX-512F : [%s]\n", isas_supported & AVX512F ? "YES" : "NO "); - printf("CPU supports AVX-512VL: [%s]\n", isas_supported & AVX512VL ? "YES" : "NO "); - printf("CPU supports AVX-512BW: [%s]\n", isas_supported & AVX512BW ? "YES" : "NO "); - printf("CPU supports AVX-512CD: [%s]\n", isas_supported & AVX512CD ? "YES" : "NO "); - printf("tsc_freq = %.1f MHz (%s)\n", RdtscClock::tsc_freq() / 1000000.0, get_tsc_cal_info(arg_force_tsc_cal)); - std::vector cpus = get_cpus(); - printf("CPU brand string: %s\n", get_brand_string().c_str()); - printf("%lu available CPUs: [%s]\n", cpus.size(), join(cpus, ", ").c_str()); - if (!arg_hyperthreads) { - cpus = filter_cpus(cpus); - printf("%lu physical cores: [%s]\n", cpus.size(), join(cpus, ", ").c_str()); - } - - if (arg_dirty && !(isas_supported & AVX512VL)) { - printf("ERROR: --dirty-upper only supported on AVX-512 hardware\n"); - exit(EXIT_FAILURE); - } - - auto iters = arg_iters.Get(); - zeroupper(); - auto specs = filter_tests(isas_supported, cpus); - - // parse comma separate list of cpu_ids into an array - std::vector cpu_ids; - if (arg_cpuids) { - for (auto& id : split(arg_cpuids.Get(), ",")) { - cpu_ids.push_back(std::atoi(id.c_str())); - } - } else { - for (int i = 0; i < (int)cpus.size(); i++) { - cpu_ids.push_back(i); - } - } - - size_t last_thread_count = -1u; - std::vector results_list; - for (auto& spec : specs) { - // if we changed the number of threads, spit out the accumulated output - if (last_thread_count != -1u && last_thread_count != spec.count()) { - // time to print results - report_results(results_list, use_aperf); - results_list.clear(); - } - last_thread_count = spec.count(); - - assert(!spec.thread_funcs.empty()); - if (verbose) printf("Running test spec: %s\n", spec.to_string().c_str()); - - // run - std::deque threads; - hot_barrier start{spec.count()}, stop{spec.count()}; - for (auto& test : spec.thread_funcs) { - threads.emplace_back(threads.size(), cpu_ids[threads.size()], start, stop, &test, iters, use_aperf); - } - - results_list.emplace_back(&spec); - for (auto& t : threads) { - t.thread.join(); - results_list.back().results.push_back(t.res); - } - } - - report_results(results_list, use_aperf); - - return EXIT_SUCCESS; -} - - - - diff --git a/tools/avx-turbo/check-uarch.sh b/tools/avx-turbo/check-uarch.sh deleted file mode 100755 index 6c6246c..0000000 --- a/tools/avx-turbo/check-uarch.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -set -e - -# runs Intel SDE to check that ./avx-turbo works for every arch -all_arch=( \ - p4p \ - mrm \ - pnr \ - nhm \ - wsm \ - snb \ - ivb \ - hsw \ - bdw \ - slt \ - slm \ - glm \ - tnt \ - skl \ - clx \ - skx \ - cnl \ - icl \ - icx \ - knl \ - knm \ - future \ -) - -for arch in "${all_arch[@]}"; do - echo "Testing arch=$arch with SDE" - sde64 -${arch} -- ./avx-turbo --max-threads=1 -done \ No newline at end of file diff --git a/tools/avx-turbo/config.mk b/tools/avx-turbo/config.mk deleted file mode 100644 index dbd0409..0000000 --- a/tools/avx-turbo/config.mk +++ /dev/null @@ -1,16 +0,0 @@ --include local.mk - -# set DEBUG to 1 to enable various debugging checks -DEBUG ?= 0 - -# The assembler to use. Defaults to nasm, but can also be set to yasm which has better -# debug info handling. -ASM ?= ./nasm-2.13.03/nasm - -ifeq ($(DEBUG),1) -O_LEVEL ?= -O0 -NASM_DEBUG ?= 1 -else -O_LEVEL ?= -O2 -NASM_DEBUG ?= 0 -endif diff --git a/tools/avx-turbo/cpu.c b/tools/avx-turbo/cpu.c deleted file mode 100644 index a42940e..0000000 --- a/tools/avx-turbo/cpu.c +++ /dev/null @@ -1,165 +0,0 @@ -/* CPU Information (v1) - * Portable Snippets - https://gitub.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - */ - -#include "cpu.h" - -#include "once.h" - -#include - -#if defined(_WIN32) -# include -# define PSNIP_CPU__IMPL_WIN32 -#elif defined(unix) || defined(__unix__) || defined(__unix) -# include -# if defined(_SC_NPROCESSORS_ONLN) || defined(_SC_NPROC_ONLN) -# define PSNIP_CPU__IMPL_SYSCONF -# else -# include -# endif -#endif - -#if defined(PSNIP_CPU_ARCH_X86) || defined(PSNIP_CPU_ARCH_X86_64) -# if defined(_MSC_VER) -static void psnip_cpu_getid(int func, int* data) { - __cpuid(data, func); -} -# else -static void psnip_cpu_getid(int func, int* data) { - __asm__ ("cpuid" - : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) - : "0" (func), "2" (0)); -} -# endif -#elif defined(PSNIP_CPU_ARCH_ARM) || defined(PSNIP_CPU_ARCH_ARM64) -# if (defined(__GNUC__) && ((__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ >= 16))) -# define PSNIP_CPU__IMPL_GETAUXVAL -# include -# endif -#endif - -static psnip_once psnip_cpu_once = PSNIP_ONCE_INIT; - -#if defined(PSNIP_CPU_ARCH_X86) || defined(PSNIP_CPU_ARCH_X86_64) -static unsigned int psnip_cpuinfo[8 * 4] = { 0, }; -#elif defined(PSNIP_CPU_ARCH_ARM) || defined(PSNIP_CPU_ARCH_ARM_64) -static unsigned long psnip_cpuinfo[2] = { 0, }; -#endif - -static void psnip_cpu_init(void) { -#if defined(PSNIP_CPU_ARCH_X86) || defined(PSNIP_CPU_ARCH_X86_64) - int i; - for (i = 0 ; i < 8 ; i++) { - psnip_cpu_getid(i, (int*) &(psnip_cpuinfo[i * 4])); - } -#elif defined(PSNIP_CPU_ARCH_ARM) || defined(PSNIP_CPU_ARCH_ARM_64) - psnip_cpuinfo[0] = getauxval (AT_HWCAP); - psnip_cpuinfo[1] = getauxval (AT_HWCAP2); -#endif -} - -int -psnip_cpu_feature_check (enum PSnipCPUFeature feature) { -#if defined(PSNIP_CPU_ARCH_X86) || defined(PSNIP_CPU_ARCH_X86_64) - unsigned int i, r, b; -#elif defined(PSNIP_CPU_ARCH_ARM) || defined(PSNIP_CPU_ARCH_ARM_64) - unsigned long b, i; -#endif - -#if defined(PSNIP_CPU_ARCH_X86) || defined(PSNIP_CPU_ARCH_X86_64) - if ((feature & PSNIP_CPU_FEATURE_CPU_MASK) != PSNIP_CPU_FEATURE_X86) - return 0; -#elif defined(PSNIP_CPU_ARCH_ARM) || defined(PSNIP_CPU_ARCH_ARM_64) - if ((feature & PSNIP_CPU_FEATURE_CPU_MASK) != PSNIP_CPU_FEATURE_ARM) - return 0; -#else - return 0; -#endif - - feature &= (enum PSnipCPUFeature) ~PSNIP_CPU_FEATURE_CPU_MASK; -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable:4152) -#endif - psnip_once_call (&psnip_cpu_once, psnip_cpu_init); -#if defined(_MSC_VER) -#pragma warning(pop) -#endif - -#if defined(PSNIP_CPU_ARCH_X86) || defined(PSNIP_CPU_ARCH_X86_64) - i = (feature >> 16) & 0xff; - r = (feature >> 8) & 0xff; - b = (feature ) & 0xff; - - if (i > 7 || r > 3 || b > 31) - return 0; - - return (psnip_cpuinfo[(i * 4) + r] >> b) & 1; -#elif defined(PSNIP_CPU_ARCH_ARM) || defined(PSNIP_CPU_ARCH_ARM_64) - b = 1 << ((feature & 0xff) - 1); - i = psnip_cpuinfo[(feature >> 0x08) & 0xff]; - return (psnip_cpuinfo[(feature >> 0x08) & 0xff] & b) == b; -#endif -} - -int -psnip_cpu_feature_check_many (enum PSnipCPUFeature* feature) { - int n; - - for (n = 0 ; feature[n] != PSNIP_CPU_FEATURE_NONE ; n++) - if (!psnip_cpu_feature_check(feature[n])) - return 0; - - return 1; -} - -int -psnip_cpu_count (void) { - static int count = 0; - int c; - -#if defined(_WIN32) - DWORD_PTR lpProcessAffinityMask; - DWORD_PTR lpSystemAffinityMask; - int i; -#elif defined(PSNIP_CPU__IMPL_SYSCONF) && defined(HW_NCPU) - int mib[2]; - size_t len; -#endif - - if (count != 0) - return count; - -#if defined(_WIN32) - if (!GetProcessAffinityMask(GetCurrentProcess(), &lpProcessAffinityMask, &lpSystemAffinityMask)) { - c = -1; - } else { - c = 0; - for (i = 0 ; lpProcessAffinityMask != 0 ; lpProcessAffinityMask >>= 1) - c += lpProcessAffinityMask & 1; - } -#elif defined(_SC_NPROCESSORS_ONLN) - c = sysconf (_SC_NPROCESSORS_ONLN); -#elif defined(_SC_NPROC_ONLN) - c = sysconf (_SC_NPROC_ONLN); -#elif defined(_hpux) - c = mpctl(MPC_GETNUMSPUS, NULL, NULL); -#elif defined(HW_NCPU) - c = 0; - mib[0] = CTL_HW; - mib[1] = HW_NCPU; - len = sizeof(c); - sysctl (mib, 2, &c, &len, NULL, 0); -#endif - - count = (c > 0) ? c : -1; - - return count; -} diff --git a/tools/avx-turbo/cpu.h b/tools/avx-turbo/cpu.h deleted file mode 100644 index 160109b..0000000 --- a/tools/avx-turbo/cpu.h +++ /dev/null @@ -1,190 +0,0 @@ -/* CPU Information (v1) - * Portable Snippets - https://gitub.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - */ - -#if !defined(PSNIP_CPU__H) -#define PSNIP_CPU__H - -#if defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -# define PSNIP_CPU_ARCH_X86_64 -#elif defined(__i686__) || defined(__i586__) || defined(__i486__) || defined(__i386__) || defined(__i386) || defined(_M_IX86) || defined(_X86_) || defined(__THW_INTEL__) -# define PSNIP_CPU_ARCH_X86 -#elif defined(__arm__) || defined(_M_ARM) -# define PSNIP_CPU_ARCH_ARM -#elif defined(__aarch64__) -# define PSNIP_CPU_ARCH_ARM64 -#endif - -#if defined(__cplusplus) -extern "C" { -#endif - -enum PSnipCPUFeature { - PSNIP_CPU_FEATURE_NONE = 0, - - PSNIP_CPU_FEATURE_CPU_MASK = 0x1f000000, - PSNIP_CPU_FEATURE_X86 = 0x01000000, - PSNIP_CPU_FEATURE_ARM = 0x04000000, - - /* x86 CPU features are constructed as: - * - * (PSNIP_CPU_FEATURE_X86 | (eax << 16) | (ret_reg << 8) | (bit_position) - * - * For example, SSE3 is determined by the fist bit in the ECX - * register for a CPUID call with EAX=1, so we get: - * - * PSNIP_CPU_FEATURE_X86 | (1 << 16) | (2 << 8) | (0) = 0x01010200 - * - * We should have information for inputs of EAX=0-7 w/ ECX=0. - */ - PSNIP_CPU_FEATURE_X86_FPU = 0x01010300, - PSNIP_CPU_FEATURE_X86_VME = 0x01010301, - PSNIP_CPU_FEATURE_X86_DE = 0x01010302, - PSNIP_CPU_FEATURE_X86_PSE = 0x01010303, - PSNIP_CPU_FEATURE_X86_TSC = 0x01010304, - PSNIP_CPU_FEATURE_X86_MSR = 0x01010305, - PSNIP_CPU_FEATURE_X86_PAE = 0x01010306, - PSNIP_CPU_FEATURE_X86_MCE = 0x01010307, - PSNIP_CPU_FEATURE_X86_CX8 = 0x01010308, - PSNIP_CPU_FEATURE_X86_APIC = 0x01010309, - PSNIP_CPU_FEATURE_X86_SEP = 0x0101030b, - PSNIP_CPU_FEATURE_X86_MTRR = 0x0101030c, - PSNIP_CPU_FEATURE_X86_PGE = 0x0101030d, - PSNIP_CPU_FEATURE_X86_MCA = 0x0101030e, - PSNIP_CPU_FEATURE_X86_CMOV = 0x0101030f, - PSNIP_CPU_FEATURE_X86_PAT = 0x01010310, - PSNIP_CPU_FEATURE_X86_PSE_36 = 0x01010311, - PSNIP_CPU_FEATURE_X86_PSN = 0x01010312, - PSNIP_CPU_FEATURE_X86_CLFSH = 0x01010313, - PSNIP_CPU_FEATURE_X86_DS = 0x01010314, - PSNIP_CPU_FEATURE_X86_ACPI = 0x01010316, - PSNIP_CPU_FEATURE_X86_MMX = 0x01010317, - PSNIP_CPU_FEATURE_X86_FXSR = 0x01010318, - PSNIP_CPU_FEATURE_X86_SSE = 0x01010319, - PSNIP_CPU_FEATURE_X86_SSE2 = 0x0101031a, - PSNIP_CPU_FEATURE_X86_SS = 0x0101031b, - PSNIP_CPU_FEATURE_X86_HTT = 0x0101031c, - PSNIP_CPU_FEATURE_X86_TM = 0x0101031d, - PSNIP_CPU_FEATURE_X86_IA64 = 0x0101031e, - PSNIP_CPU_FEATURE_X86_PBE = 0x0101031f, - - PSNIP_CPU_FEATURE_X86_SSE3 = 0x01010200, - PSNIP_CPU_FEATURE_X86_PCLMULQDQ = 0x01010201, - PSNIP_CPU_FEATURE_X86_DTES64 = 0x01010202, - PSNIP_CPU_FEATURE_X86_MONITOR = 0x01010203, - PSNIP_CPU_FEATURE_X86_DS_CPL = 0x01010204, - PSNIP_CPU_FEATURE_X86_VMX = 0x01010205, - PSNIP_CPU_FEATURE_X86_SMX = 0x01010206, - PSNIP_CPU_FEATURE_X86_EST = 0x01010207, - PSNIP_CPU_FEATURE_X86_TM2 = 0x01010208, - PSNIP_CPU_FEATURE_X86_SSSE3 = 0x01010209, - PSNIP_CPU_FEATURE_X86_CNXT_ID = 0x0101020a, - PSNIP_CPU_FEATURE_X86_SDBG = 0x0101020b, - PSNIP_CPU_FEATURE_X86_FMA = 0x0101020c, - PSNIP_CPU_FEATURE_X86_CX16 = 0x0101020d, - PSNIP_CPU_FEATURE_X86_XTPR = 0x0101020e, - PSNIP_CPU_FEATURE_X86_PDCM = 0x0101020f, - PSNIP_CPU_FEATURE_X86_PCID = 0x01010211, - PSNIP_CPU_FEATURE_X86_DCA = 0x01010212, - PSNIP_CPU_FEATURE_X86_SSE4_1 = 0x01010213, - PSNIP_CPU_FEATURE_X86_SSE4_2 = 0x01010214, - PSNIP_CPU_FEATURE_X86_X2APIC = 0x01010215, - PSNIP_CPU_FEATURE_X86_MOVBE = 0x01010216, - PSNIP_CPU_FEATURE_X86_POPCNT = 0x01010217, - PSNIP_CPU_FEATURE_X86_TSC_DEADLINE = 0x01010218, - PSNIP_CPU_FEATURE_X86_AES = 0x01010219, - PSNIP_CPU_FEATURE_X86_XSAVE = 0x0101021a, - PSNIP_CPU_FEATURE_X86_OSXSAVE = 0x0101021b, - PSNIP_CPU_FEATURE_X86_AVX = 0x0101021c, - PSNIP_CPU_FEATURE_X86_F16C = 0x0101021d, - PSNIP_CPU_FEATURE_X86_RDRND = 0x0101021e, - PSNIP_CPU_FEATURE_X86_HYPERVISOR = 0x0101021f, - - PSNIP_CPU_FEATURE_X86_FSGSBASE = 0x01070100, - PSNIP_CPU_FEATURE_X86_TSC_ADJ = 0x01070101, - PSNIP_CPU_FEATURE_X86_SGX = 0x01070102, - PSNIP_CPU_FEATURE_X86_BMI1 = 0x01070103, - PSNIP_CPU_FEATURE_X86_HLE = 0x01070104, - PSNIP_CPU_FEATURE_X86_AVX2 = 0x01070105, - PSNIP_CPU_FEATURE_X86_SMEP = 0x01070107, - PSNIP_CPU_FEATURE_X86_BMI2 = 0x01070108, - PSNIP_CPU_FEATURE_X86_ERMS = 0x01070109, - PSNIP_CPU_FEATURE_X86_INVPCID = 0x0107010a, - PSNIP_CPU_FEATURE_X86_RTM = 0x0107010b, - PSNIP_CPU_FEATURE_X86_PQM = 0x0107010c, - PSNIP_CPU_FEATURE_X86_MPX = 0x0107010e, - PSNIP_CPU_FEATURE_X86_PQE = 0x0107010f, - PSNIP_CPU_FEATURE_X86_AVX512F = 0x01070110, - PSNIP_CPU_FEATURE_X86_AVX512DQ = 0x01070111, - PSNIP_CPU_FEATURE_X86_RDSEED = 0x01070112, - PSNIP_CPU_FEATURE_X86_ADX = 0x01070113, - PSNIP_CPU_FEATURE_X86_SMAP = 0x01070114, - PSNIP_CPU_FEATURE_X86_AVX512IFMA = 0x01070115, - PSNIP_CPU_FEATURE_X86_PCOMMIT = 0x01070116, - PSNIP_CPU_FEATURE_X86_CLFLUSHOPT = 0x01070117, - PSNIP_CPU_FEATURE_X86_CLWB = 0x01070118, - PSNIP_CPU_FEATURE_X86_INTEL_PT = 0x01070119, - PSNIP_CPU_FEATURE_X86_AVX512PF = 0x0107011a, - PSNIP_CPU_FEATURE_X86_AVX512ER = 0x0107011b, - PSNIP_CPU_FEATURE_X86_AVX512CD = 0x0107011c, - PSNIP_CPU_FEATURE_X86_SHA = 0x0107011d, - PSNIP_CPU_FEATURE_X86_AVX512BW = 0x0107011e, - PSNIP_CPU_FEATURE_X86_AVX512VL = 0x0107011f, - - PSNIP_CPU_FEATURE_X86_PREFETCHWT1 = 0x01070200, - PSNIP_CPU_FEATURE_X86_AVX512VBMI = 0x01070201, - PSNIP_CPU_FEATURE_X86_UMIP = 0x01070202, - PSNIP_CPU_FEATURE_X86_PKU = 0x01070203, - PSNIP_CPU_FEATURE_X86_OSPKE = 0x01070204, - PSNIP_CPU_FEATURE_X86_AVX512VPOPCNTDQ = 0x0107020e, - PSNIP_CPU_FEATURE_X86_RDPID = 0x01070215, - PSNIP_CPU_FEATURE_X86_SGX_LC = 0x0107021e, - - PSNIP_CPU_FEATURE_X86_AVX512_4VNNIW = 0x01070302, - PSNIP_CPU_FEATURE_X86_AVX512_4FMAPS = 0x01070303, - - PSNIP_CPU_FEATURE_ARM_SWP = PSNIP_CPU_FEATURE_ARM | 1, - PSNIP_CPU_FEATURE_ARM_HALF = PSNIP_CPU_FEATURE_ARM | 2, - PSNIP_CPU_FEATURE_ARM_THUMB = PSNIP_CPU_FEATURE_ARM | 3, - PSNIP_CPU_FEATURE_ARM_26BIT = PSNIP_CPU_FEATURE_ARM | 4, - PSNIP_CPU_FEATURE_ARM_FAST_MULT = PSNIP_CPU_FEATURE_ARM | 5, - PSNIP_CPU_FEATURE_ARM_FPA = PSNIP_CPU_FEATURE_ARM | 6, - PSNIP_CPU_FEATURE_ARM_VFP = PSNIP_CPU_FEATURE_ARM | 7, - PSNIP_CPU_FEATURE_ARM_EDSP = PSNIP_CPU_FEATURE_ARM | 8, - PSNIP_CPU_FEATURE_ARM_JAVA = PSNIP_CPU_FEATURE_ARM | 9, - PSNIP_CPU_FEATURE_ARM_IWMMXT = PSNIP_CPU_FEATURE_ARM | 10, - PSNIP_CPU_FEATURE_ARM_CRUNCH = PSNIP_CPU_FEATURE_ARM | 11, - PSNIP_CPU_FEATURE_ARM_THUMBEE = PSNIP_CPU_FEATURE_ARM | 12, - PSNIP_CPU_FEATURE_ARM_NEON = PSNIP_CPU_FEATURE_ARM | 13, - PSNIP_CPU_FEATURE_ARM_VFPV3 = PSNIP_CPU_FEATURE_ARM | 14, - PSNIP_CPU_FEATURE_ARM_VFPV3D16 = PSNIP_CPU_FEATURE_ARM | 15, - PSNIP_CPU_FEATURE_ARM_TLS = PSNIP_CPU_FEATURE_ARM | 16, - PSNIP_CPU_FEATURE_ARM_VFPV4 = PSNIP_CPU_FEATURE_ARM | 17, - PSNIP_CPU_FEATURE_ARM_IDIVA = PSNIP_CPU_FEATURE_ARM | 18, - PSNIP_CPU_FEATURE_ARM_IDIVT = PSNIP_CPU_FEATURE_ARM | 19, - PSNIP_CPU_FEATURE_ARM_VFPD32 = PSNIP_CPU_FEATURE_ARM | 20, - PSNIP_CPU_FEATURE_ARM_LPAE = PSNIP_CPU_FEATURE_ARM | 21, - PSNIP_CPU_FEATURE_ARM_EVTSTRM = PSNIP_CPU_FEATURE_ARM | 22, - - PSNIP_CPU_FEATURE_ARM_AES = PSNIP_CPU_FEATURE_ARM | 0x0100 | 1, - PSNIP_CPU_FEATURE_ARM_PMULL = PSNIP_CPU_FEATURE_ARM | 0x0100 | 2, - PSNIP_CPU_FEATURE_ARM_SHA1 = PSNIP_CPU_FEATURE_ARM | 0x0100 | 3, - PSNIP_CPU_FEATURE_ARM_SHA2 = PSNIP_CPU_FEATURE_ARM | 0x0100 | 4, - PSNIP_CPU_FEATURE_ARM_CRC32 = PSNIP_CPU_FEATURE_ARM | 0x0100 | 5 -}; - -int psnip_cpu_count (void); -int psnip_cpu_feature_check (enum PSnipCPUFeature feature); -int psnip_cpu_feature_check_many (enum PSnipCPUFeature* feature); - -#if defined(__cplusplus) -} -#endif - -#endif /* PSNIP_CPU__H */ diff --git a/tools/avx-turbo/cpuid.cpp b/tools/avx-turbo/cpuid.cpp deleted file mode 100644 index f96f61f..0000000 --- a/tools/avx-turbo/cpuid.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * cpuid.cpp - */ - -#include "cpuid.hpp" - -#include - -using std::uint8_t; -using std::uint32_t; - - -std::string cpuid_result::to_string() { - std::string s; - s += "eax = " + std::to_string(eax) + ", "; - s += "ebx = " + std::to_string(ebx) + ", "; - s += "ecx = " + std::to_string(ecx) + ", "; - s += "edx = " + std::to_string(edx); - return s; -} - -uint32_t cpuid_highest_leaf_inner() { - return cpuid(0).eax; -} - -uint32_t cpuid_highest_leaf() { - static uint32_t cached = cpuid_highest_leaf_inner(); - return cached; -} - -cpuid_result cpuid(int leaf, int subleaf) { - cpuid_result ret = {}; - asm ("cpuid" - : - "=a" (ret.eax), - "=b" (ret.ebx), - "=c" (ret.ecx), - "=d" (ret.edx) - : - "a" (leaf), - "c" (subleaf) - ); - return ret; -} - -cpuid_result cpuid(int leaf) { - return cpuid(leaf, 0); -} - -family_model gfm_inner() { - auto cpuid1 = cpuid(1); - family_model ret; - ret.family = (cpuid1.eax >> 8) & 0xF; - ret.model = (cpuid1.eax >> 4) & 0xF; - ret.stepping = (cpuid1.eax ) & 0xF; - if (ret.family == 15) { - ret.family += (cpuid1.eax >> 20) & 0xFF; // extended family - } - if (ret.family == 15 || ret.family == 6) { - ret.model += ((cpuid1.eax >> 16) & 0xF) << 4; // extended model - } - return ret; -} - -family_model get_family_model() { - static family_model cached_family_model = gfm_inner(); - return cached_family_model; -} - -std::string get_brand_string() { - auto check = cpuid(0x80000000); - if (check.eax < 0x80000004) { - return std::string("unkown (eax =") + std::to_string(check.eax) +")"; - } - std::string ret; - for (uint32_t eax : {0x80000002, 0x80000003, 0x80000004}) { - char buf[17]; - auto fourchars = cpuid(eax); - memcpy(buf + 0, &fourchars.eax, 4); - memcpy(buf + 4, &fourchars.ebx, 4); - memcpy(buf + 8, &fourchars.ecx, 4); - memcpy(buf + 12, &fourchars.edx, 4); - buf[16] = '\0'; - ret += buf; - } - return ret; -} - -/* get bits [start:end] inclusive of the given value */ -uint32_t get_bits(uint32_t value, int start, int end) { - value >>= start; - uint32_t mask = ((uint64_t)-1) << (end - start + 1); - return value & ~mask; -} - -/** - * Get the shift amount for unique physical core IDs - */ -int get_smt_shift() -{ - if (cpuid_highest_leaf() < 0xb) { - return -1; - } - uint32_t smtShift = -1u; - for (uint32_t subleaf = 0; ; subleaf++) { - cpuid_result leafb = cpuid(0xb, subleaf); - uint32_t type = get_bits(leafb.ecx, 8 ,15); - if (!get_bits(leafb.ebx,0,15) || type == 0) { - // done - break; - } - if (type == 1) { - // here's the value we are after: make sure we don't have more than one entry for - // this type though! - if (smtShift != -1u) { - fprintf(stderr, "Warning: more than one level of type 1 in the x2APIC hierarchy"); - } - smtShift = get_bits(leafb.eax, 0, 4); - } - } - return smtShift; -} - diff --git a/tools/avx-turbo/cpuid.hpp b/tools/avx-turbo/cpuid.hpp deleted file mode 100644 index 7d6802d..0000000 --- a/tools/avx-turbo/cpuid.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* - * cpuid.hpp - */ - -#ifndef CPUID_HPP_ -#define CPUID_HPP_ - -#include -#include - -struct cpuid_result { - std::uint32_t eax, ebx, ecx, edx; - std::string to_string(); -}; - -struct family_model { - uint8_t family; - uint8_t model; - uint8_t stepping; - std::string to_string() { - std::string s; - s += "family = " + std::to_string(family) + ", "; - s += "model = " + std::to_string(model) + ", "; - s += "stepping = " + std::to_string(stepping); - return s; - } -}; - - -/** the highest supported leaf value */ -uint32_t cpuid_highest_leaf(); - -/* return the CPUID result for querying the given leaf (EAX) and no subleaf (ECX=0) */ -cpuid_result cpuid(int leaf); - -/* return the CPUID result for querying the given leaf (EAX) and subleaf (ECX) */ -cpuid_result cpuid(int leaf, int subleaf); - -family_model get_family_model(); - -std::string get_brand_string(); - -int get_smt_shift(); - -/* get bits [start:end] inclusive of the given value */ -uint32_t get_bits(uint32_t value, int start, int end); - -#endif /* CPUID_HPP_ */ diff --git a/tools/avx-turbo/exact-int.h b/tools/avx-turbo/exact-int.h deleted file mode 100644 index 918b8f7..0000000 --- a/tools/avx-turbo/exact-int.h +++ /dev/null @@ -1,229 +0,0 @@ -/* Exact-width integer types - * Portable Snippets - https://gitub.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * This header tries to define psnip_(u)int(8|16|32|64)_t to - * appropriate types given your system. For most systems this means - * including and adding a few preprocessor definitions. - * - * If you prefer, you can define any necessary types yourself. - * Snippets in this repository which rely on these types will not - * attempt to include this header if you have already defined the - * types it uses. - */ - -#if !defined(PSNIP_EXACT_INT_H) -# define PSNIP_EXACT_INT_H -# if !defined(PSNIP_EXACT_INT_HAVE_STDINT) -# if defined(_STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) -# define PSNIP_EXACT_INT_HAVE_STDINT -# elif defined(__has_include) -# if __has_include() -# define PSNIP_EXACT_INT_HAVE_STDINT -# endif -# elif \ - defined(HAVE_STDINT_H) || \ - defined(_STDINT_H_INCLUDED) || \ - defined(_STDINT_H) || \ - defined(_STDINT_H_) -# define PSNIP_EXACT_INT_HAVE_STDINT -# elif \ - (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) || \ - (defined(_MSC_VER) && (_MSC_VER >= 1600)) || \ - (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x570)) || \ - (defined(__WATCOMC__) && (__WATCOMC__ >= 1250)) -# define PSNIP_EXACT_INT_HAVE_STDINT -# endif -# endif - -# if \ - defined(__INT8_TYPE__) && defined(__INT16_TYPE__) && defined(__INT32_TYPE__) && defined(__INT64_TYPE__) && \ - defined(__UINT8_TYPE__) && defined(__UINT16_TYPE__) && defined(__UINT32_TYPE__) && defined(__UINT64_TYPE__) -# define psnip_int8_t __INT8_TYPE__ -# define psnip_int16_t __INT16_TYPE__ -# define psnip_int32_t __INT32_TYPE__ -# define psnip_int64_t __INT64_TYPE__ -# define psnip_uint8_t __UINT8_TYPE__ -# define psnip_uint16_t __UINT16_TYPE__ -# define psnip_uint32_t __UINT32_TYPE__ -# define psnip_uint64_t __UINT64_TYPE__ -# elif defined(PSNIP_EXACT_INT_HAVE_STDINT) -# include -# if !defined(psnip_int8_t) -# define psnip_int8_t int8_t -# endif -# if !defined(psnip_uint8_t) -# define psnip_uint8_t uint8_t -# endif -# if !defined(psnip_int16_t) -# define psnip_int16_t int16_t -# endif -# if !defined(psnip_uint16_t) -# define psnip_uint16_t uint16_t -# endif -# if !defined(psnip_int32_t) -# define psnip_int32_t int32_t -# endif -# if !defined(psnip_uint32_t) -# define psnip_uint32_t uint32_t -# endif -# if !defined(psnip_int64_t) -# define psnip_int64_t int64_t -# endif -# if !defined(psnip_uint64_t) -# define psnip_uint64_t uint64_t -# endif -# elif defined(_MSC_VER) -# if !defined(psnip_int8_t) -# define psnip_int8_t __int8 -# endif -# if !defined(psnip_uint8_t) -# define psnip_uint8_t unsigned __int8 -# endif -# if !defined(psnip_int16_t) -# define psnip_int16_t __int16 -# endif -# if !defined(psnip_uint16_t) -# define psnip_uint16_t unsigned __int16 -# endif -# if !defined(psnip_int32_t) -# define psnip_int32_t __int32 -# endif -# if !defined(psnip_uint32_t) -# define psnip_uint32_t unsigned __int32 -# endif -# if !defined(psnip_int64_t) -# define psnip_int64_t __int64 -# endif -# if !defined(psnip_uint64_t) -# define psnip_uint64_t unsigned __int64 -# endif -# else -# include -# if !defined(psnip_int8_t) -# if defined(CHAR_MIN) && defined(CHAR_MAX) && (CHAR_MIN == (-127-1)) && (CHAR_MAX == 127) -# define psnip_int8_t char -# elif defined(SHRT_MIN) && defined(SHRT_MAX) && (SHRT_MIN == (-127-1)) && (SHRT_MAX == 127) -# define psnip_int8_t short -# elif defined(INT_MIN) && defined(INT_MAX) && (INT_MIN == (-127-1)) && (INT_MAX == 127) -# define psnip_int8_t int -# elif defined(LONG_MIN) && defined(LONG_MAX) && (LONG_MIN == (-127-1)) && (LONG_MAX == 127) -# define psnip_int8_t long -# elif defined(LLONG_MIN) && defined(LLONG_MAX) && (LLONG_MIN == (-127-1)) && (LLONG_MAX == 127) -# define psnip_int8_t long long -# else -# error Unable to locate 8-bit signed integer type. -# endif -# endif -# if !defined(psnip_uint8_t) -# if defined(UCHAR_MAX) && (UCHAR_MAX == 255) -# define psnip_uint8_t unsigned char -# elif defined(USHRT_MAX) && (USHRT_MAX == 255) -# define psnip_uint8_t unsigned short -# elif defined(UINT_MAX) && (UINT_MAX == 255) -# define psnip_uint8_t unsigned int -# elif defined(ULONG_MAX) && (ULONG_MAX == 255) -# define psnip_uint8_t unsigned long -# elif defined(ULLONG_MAX) && (ULLONG_MAX == 255) -# define psnip_uint8_t unsigned long long -# else -# error Unable to locate 8-bit unsigned integer type. -# endif -# endif -# if !defined(psnip_int16_t) -# if defined(CHAR_MIN) && defined(CHAR_MAX) && (CHAR_MIN == (-32767-1)) && (CHAR_MAX == 32767) -# define psnip_int16_t char -# elif defined(SHRT_MIN) && defined(SHRT_MAX) && (SHRT_MIN == (-32767-1)) && (SHRT_MAX == 32767) -# define psnip_int16_t short -# elif defined(INT_MIN) && defined(INT_MAX) && (INT_MIN == (-32767-1)) && (INT_MAX == 32767) -# define psnip_int16_t int -# elif defined(LONG_MIN) && defined(LONG_MAX) && (LONG_MIN == (-32767-1)) && (LONG_MAX == 32767) -# define psnip_int16_t long -# elif defined(LLONG_MIN) && defined(LLONG_MAX) && (LLONG_MIN == (-32767-1)) && (LLONG_MAX == 32767) -# define psnip_int16_t long long -# else -# error Unable to locate 16-bit signed integer type. -# endif -# endif -# if !defined(psnip_uint16_t) -# if defined(UCHAR_MAX) && (UCHAR_MAX == 65535) -# define psnip_uint16_t unsigned char -# elif defined(USHRT_MAX) && (USHRT_MAX == 65535) -# define psnip_uint16_t unsigned short -# elif defined(UINT_MAX) && (UINT_MAX == 65535) -# define psnip_uint16_t unsigned int -# elif defined(ULONG_MAX) && (ULONG_MAX == 65535) -# define psnip_uint16_t unsigned long -# elif defined(ULLONG_MAX) && (ULLONG_MAX == 65535) -# define psnip_uint16_t unsigned long long -# else -# error Unable to locate 16-bit unsigned integer type. -# endif -# endif -# if !defined(psnip_int32_t) -# if defined(CHAR_MIN) && defined(CHAR_MAX) && (CHAR_MIN == (-2147483647-1)) && (CHAR_MAX == 2147483647) -# define psnip_int32_t char -# elif defined(SHRT_MIN) && defined(SHRT_MAX) && (SHRT_MIN == (-2147483647-1)) && (SHRT_MAX == 2147483647) -# define psnip_int32_t short -# elif defined(INT_MIN) && defined(INT_MAX) && (INT_MIN == (-2147483647-1)) && (INT_MAX == 2147483647) -# define psnip_int32_t int -# elif defined(LONG_MIN) && defined(LONG_MAX) && (LONG_MIN == (-2147483647-1)) && (LONG_MAX == 2147483647) -# define psnip_int32_t long -# elif defined(LLONG_MIN) && defined(LLONG_MAX) && (LLONG_MIN == (-2147483647-1)) && (LLONG_MAX == 2147483647) -# define psnip_int32_t long long -# else -# error Unable to locate 32-bit signed integer type. -# endif -# endif -# if !defined(psnip_uint32_t) -# if defined(UCHAR_MAX) && (UCHAR_MAX == 4294967295) -# define psnip_uint32_t unsigned char -# elif defined(USHRT_MAX) && (USHRT_MAX == 4294967295) -# define psnip_uint32_t unsigned short -# elif defined(UINT_MAX) && (UINT_MAX == 4294967295) -# define psnip_uint32_t unsigned int -# elif defined(ULONG_MAX) && (ULONG_MAX == 4294967295) -# define psnip_uint32_t unsigned long -# elif defined(ULLONG_MAX) && (ULLONG_MAX == 4294967295) -# define psnip_uint32_t unsigned long long -# else -# error Unable to locate 32-bit unsigned integer type. -# endif -# endif -# if !defined(psnip_int64_t) -# if defined(CHAR_MIN) && defined(CHAR_MAX) && (CHAR_MIN == (-9223372036854775807LL-1)) && (CHAR_MAX == 9223372036854775807LL) -# define psnip_int64_t char -# elif defined(SHRT_MIN) && defined(SHRT_MAX) && (SHRT_MIN == (-9223372036854775807LL-1)) && (SHRT_MAX == 9223372036854775807LL) -# define psnip_int64_t short -# elif defined(INT_MIN) && defined(INT_MAX) && (INT_MIN == (-9223372036854775807LL-1)) && (INT_MAX == 9223372036854775807LL) -# define psnip_int64_t int -# elif defined(LONG_MIN) && defined(LONG_MAX) && (LONG_MIN == (-9223372036854775807LL-1)) && (LONG_MAX == 9223372036854775807LL) -# define psnip_int64_t long -# elif defined(LLONG_MIN) && defined(LLONG_MAX) && (LLONG_MIN == (-9223372036854775807LL-1)) && (LLONG_MAX == 9223372036854775807LL) -# define psnip_int64_t long long -# else -# error Unable to locate 64-bit signed integer type. -# endif -# endif -# if !defined(psnip_uint64_t) -# if defined(UCHAR_MAX) && (UCHAR_MAX == 18446744073709551615ULL) -# define psnip_uint64_t unsigned char -# elif defined(USHRT_MAX) && (USHRT_MAX == 18446744073709551615ULL) -# define psnip_uint64_t unsigned short -# elif defined(UINT_MAX) && (UINT_MAX == 18446744073709551615ULL) -# define psnip_uint64_t unsigned int -# elif defined(ULONG_MAX) && (ULONG_MAX == 18446744073709551615ULL) -# define psnip_uint64_t unsigned long -# elif defined(ULLONG_MAX) && (ULLONG_MAX == 18446744073709551615ULL) -# define psnip_uint64_t unsigned long long -# else -# error Unable to locate 64-bit unsigned integer type. -# endif -# endif -# endif -#endif diff --git a/tools/avx-turbo/msr-access.c b/tools/avx-turbo/msr-access.c deleted file mode 100644 index 902aee4..0000000 --- a/tools/avx-turbo/msr-access.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * msr-access.c - */ - -// for pread() and sched_getcpu() -#define _GNU_SOURCE - -#include "msr-access.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/** if there are this many CPUs or less, we'll never allocate memory */ -#define STATIC_ARRAY_SIZE 32 - -#ifndef MSR_USE_PTHREADS -// thread-safe by default -#define MSR_USE_PTHREADS 1 -#endif - -#if MSR_USE_PTHREADS -#include -static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; -void lock() { - pthread_mutex_lock(&mutex); -} -void unlock() { - pthread_mutex_unlock(&mutex); -} -#else -void lock() {} -void unlock(){} -#endif - - - -/* size of the rfile array */ -int rfile_static[STATIC_ARRAY_SIZE] = {}; -int rfile_size = STATIC_ARRAY_SIZE; -int *rfile_array = rfile_static; -//int rfile_error; - -/** get the read-only file associated with the given cpu */ -int get_rfile(int cpu) { - assert(cpu >= 0); - - lock(); - - if (cpu >= rfile_size) { - // expand array - size_t new_size = rfile_size * 2 > cpu ? rfile_size * 2 : cpu; - int *new_array = calloc(new_size, sizeof(int)); - memcpy(new_array, rfile_array, rfile_size * sizeof(int)); - if (rfile_array != rfile_static) { - free(rfile_array); - } - rfile_array = new_array; - rfile_size = new_size; - } - - if (rfile_array[cpu] == 0) { - char filename[64] = {}; - int ret = snprintf(filename, 64, "/dev/cpu/%d/msr", cpu); - assert(ret > 0); - rfile_array[cpu] = open(filename, O_RDONLY); - if (rfile_array[cpu] == -1) { - rfile_array[cpu] = -errno; - } - } - - int ret = rfile_array[cpu]; - - unlock(); - - return ret; -} - -int read_msr(int cpu, uint32_t msr_index, uint64_t* value) { - int file = get_rfile(cpu); - assert(file); - if (file < 0) { - // file open failes are stored as negative errno - return file; - } - int read = pread(file, value, 8, msr_index); - return read == -1 ? errno : 0; -} - -int read_msr_cur_cpu(uint32_t msr_index, uint64_t* value) { - return read_msr(sched_getcpu(), msr_index, value); -} - - -// rename this to main to build an exe that can be run as ./a.out CPU MSR -// to read MSR from CPU (like a really simple rdmsr) -int test(int argc, char** argv) { - assert(argc == 3); - int cpu = atoi(argv[1]); - uint32_t msr = atoi(argv[2]); - printf("reading msr %u from cpu %d\n", msr, cpu); - uint64_t value = -1; - - int res = read_msr(cpu, msr, &value); - if (res) { - printf("error %d\n", res); - } else { - printf("value %lx\n", value); - } - - res = read_msr_cur_cpu(msr, &value); - if (res) { - printf("error %d\n", res); - } else { - printf("value %lx\n", value); - } - - return EXIT_SUCCESS; -} - - diff --git a/tools/avx-turbo/msr-access.h b/tools/avx-turbo/msr-access.h deleted file mode 100644 index 003a06f..0000000 --- a/tools/avx-turbo/msr-access.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * msr-access.h - * - * Simple API to access the x86 MSR registers exposed on linux with through the /dev/cpu/N/msr file system. - * - * Unless you've changed the msr permissions, only root can do this. The msr filesystem may not exist until - * 'modprobe msr' is executed to load the msr module. - */ - -#ifndef MSR_ACCESS_H_ -#define MSR_ACCESS_H_ - -#include -// you could get the MSR index values from the following header, although it isn't exported to user-space -// in kernels after 4.12, but you can grab it from the linux source -// #include - -#ifdef __cplusplus -extern "C" { -#endif - - -/** - * Read the MSR given by msr_index on the given cpu, storing the result into - * result, which must point to at least 8 bytes of storage. - * - * Returns zero on success, non-zero on failure. - * - * Negative values indicate errors - * opening the underlying MSR file: the value returned is the negative of the errno - * returned by the kernel when trying to open the file. These file errors is cached - * so once a negative value has been returned for a given cpu, subsequent calls will - * always return the same value. - * - * Positive values indicate failures during the pread call performed to actually read - * the msr from the open file. The value is the errno returned by the kernel after the - * read. The most common value is 5 (EIO) which indicates that you can't read that MSR - * on this hardware (e.g., if may not exist). - */ -int read_msr(int cpu, uint32_t msr_index, uint64_t* value); - -/** - * Reads the given MSR on the current CPU. This is just a shortcut for calling - * read_msr(sched_getcpu(), ...), and the result and error handling is the same as that function. - * - * Of course, unless the thread affinity has been restricted for the current thread, - * the result doesn't help the calling code know the true value on the current CPU since - * a context switch can happen at any time (the same caveat applies to getcpu()). - */ -int read_msr_cur_cpu(uint32_t msr_index, uint64_t* value); - - -#ifdef __cplusplus -} // extern "C" { -#endif - -#endif // #ifdef MSR_ACCESS_H_ diff --git a/tools/avx-turbo/nasm-2.13.03/LICENSE b/tools/avx-turbo/nasm-2.13.03/LICENSE deleted file mode 100644 index 331e260..0000000 --- a/tools/avx-turbo/nasm-2.13.03/LICENSE +++ /dev/null @@ -1,29 +0,0 @@ -NASM is now licensed under the 2-clause BSD license, also known as the -simplified BSD license. - - Copyright 1996-2010 the NASM Authors - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following - conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/tools/avx-turbo/nasm-2.13.03/NOTE b/tools/avx-turbo/nasm-2.13.03/NOTE deleted file mode 100644 index dc67373..0000000 --- a/tools/avx-turbo/nasm-2.13.03/NOTE +++ /dev/null @@ -1,3 +0,0 @@ -This stable version of the nasm binary is here in avx-turbo since many version of nasm included in distributions are -capable of compiling AVX-512 (2.11 is required at a minimum). So we include the binary here along with the LICENSE -(which allows such binary distribution). diff --git a/tools/avx-turbo/nasm-2.13.03/nasm b/tools/avx-turbo/nasm-2.13.03/nasm deleted file mode 100755 index 3da333e..0000000 Binary files a/tools/avx-turbo/nasm-2.13.03/nasm and /dev/null differ diff --git a/tools/avx-turbo/nasm-utils-helper.c b/tools/avx-turbo/nasm-utils-helper.c deleted file mode 100644 index aa4e38b..0000000 --- a/tools/avx-turbo/nasm-utils-helper.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * nasm-utils-helper.c - * - * C helper functions for some macros in nasm-utils-inc.asm. - * - * If you use any macros that require functionality defined here, just include this C file in - * your project (linked against the same object that contains the assembly generated with the - * help of nasm-utils-inc.asm). - */ - -#include -#include -#include -#include - - -// mapping from reg_id to register name -static const char *reg_names[] = { - "rbp", - "rbx", - "r12", - "r13", - "r14", - "r15" -}; - -/* called when a function using abi_checked_function detects an illegally clobbered register */ -void nasm_util_die_on_reg_clobber(const char *fname, unsigned reg_id) { - reg_id--; // reg ids are 1-based - if (reg_id >= sizeof(sizeof(reg_names)/sizeof(reg_names[0]))) { - fprintf(stderr, "FATAL: function %s clobbered a callee-saved register (thunk returned an invalid reg_id %d)\n", - fname, reg_id); - } else { - fprintf(stderr, "FATAL: function %s clobbered callee-saved register %s\n", fname, reg_names[reg_id]); - } - abort(); -} - -void nasm_util_assert_failed(const char *left, const char *right, const char *filename, int64_t line) { - fprintf(stderr, "%s:%ld : Assertion failed: %s == %s\n", filename, (long)line, left, right); - fflush(stderr); - abort(); -} diff --git a/tools/avx-turbo/nasm-utils-inc.asm b/tools/avx-turbo/nasm-utils-inc.asm deleted file mode 100644 index e87c02c..0000000 --- a/tools/avx-turbo/nasm-utils-inc.asm +++ /dev/null @@ -1,210 +0,0 @@ -;; potentially useful macros for asm development - -;; long-nop instructions: nopX inserts a nop of X bytes -;; see "Table 4-12. Recommended Multi-Byte Sequence of NOP Instruction" in -;; "Intelยฎ 64 and IA-32 Architectures Software Developerโ€™s Manual" (325383-061US) -%define nop1 nop ; just a nop, included for completeness -%define nop2 db 0x66, 0x90 ; 66 NOP -%define nop3 db 0x0F, 0x1F, 0x00 ; NOP DWORD ptr [EAX] -%define nop4 db 0x0F, 0x1F, 0x40, 0x00 ; NOP DWORD ptr [EAX + 00H] -%define nop5 db 0x0F, 0x1F, 0x44, 0x00, 0x00 ; NOP DWORD ptr [EAX + EAX*1 + 00H] -%define nop6 db 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 ; 66 NOP DWORD ptr [EAX + EAX*1 + 00H] -%define nop7 db 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 ; NOP DWORD ptr [EAX + 00000000H] -%define nop8 db 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 ; NOP DWORD ptr [EAX + EAX*1 + 00000000H] -%define nop9 db 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 ; 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] - -;; push the 6 callee-saved registers defined in the the SysV C ABI -%macro push_callee_saved 0 -push rbp -push rbx -push r12 -push r13 -push r14 -push r15 -%endmacro - -;; pop the 6 callee-saved registers in the order compatible with push_callee_saved -%macro pop_callee_saved 0 -pop r15 -pop r14 -pop r13 -pop r12 -pop rbx -pop rbp -%endmacro - -EXTERN nasm_util_assert_failed - -; place the string value of a tken in .rodata using %defstr -; arg1 - the token to make into a string -; arg2 - label which will point to the string -%macro make_string_tok 2 -%ifdef __YASM_MAJOR__ -; yasm has no support for defstr so we just use a fixed string for now -; see https://github.com/travisdowns/nasm-utils/issues/1 -make_string 'make_string_tok yasm bug', %2 -%else -%defstr make_string_temp %1 -make_string make_string_temp, %2 -%endif -%endmacro - -%macro make_string 2 -[section .rodata] -%2: -db %1,0 -; restore the previous section -__SECT__ -%endmacro - -%macro nasm_util_assert_boilerplate 0 -make_string __FILE__, parent_filename -%define ASSERT_BOILERPLATE 1 -%endmacro - -%macro check_assert_boilerplate 0 -%ifndef ASSERT_BOILERPLATE -%error "To use asserts, you must include a call to nasm_util_assert_boilerplate once in each file" -%endif -%endmacro - -;; assembly level asserts -;; if the assert occurs, termination is assumed so control never passes back to the caller -;; and registers are not preserved -%macro assert_eq 2 -check_assert_boilerplate -cmp %1, %2 -je %%assert_ok -make_string_tok %1, %%assert_string1 -make_string_tok %2, %%assert_string2 -lea rdi, [%%assert_string1] -lea rsi, [%%assert_string2] -lea rdx, [parent_filename] -mov rcx, __LINE__ -jmp nasm_util_assert_failed -%%assert_ok: -%endmacro - -;; boilerplate needed once when abi_checked_function is used -%macro thunk_boilerplate 0 -; this function is defined by the C helper code -EXTERN nasm_util_die_on_reg_clobber - -boil1 rbp, 1 -boil1 rbx, 2 -boil1 r12, 3 -boil1 r13, 4 -boil1 r14, 5 -boil1 r15, 6 -%endmacro - -;; By default, the "assert-like" features that can be conditionally enabled key off the value of the -;; NDEBUG macro: if it is defined, the slower, more heavily checked paths are enabled, otherwise they -;; are omitted (usually resulting in zero additional cost). -;; -;; If you don't want to rely on NDEBUG can specifically enable or disable the debug mode with the -;; NASM_ENABLE_DEBUG set to 0 (equivalent to NDEBUG set) or 1 (equivalent to NDEBUG not set) -%ifndef NASM_ENABLE_DEBUG - %ifdef NDEBUG - %define NASM_ENABLE_DEBUG 0 - %else - %define NASM_ENABLE_DEBUG 1 - %endif -%elif (NASM_ENABLE_DEBUG != 0) && (NASM_ENABLE_DEBUG != 1) - %error bad value for 'NASM_ENABLE_DEBUG': should be 0 or 1 but was NASM_ENABLE_DEBUG -%endif - - - - -;; This macro supports declaring a "ABI-checked" function in asm -;; An ABI-checked function will checked at each invocation for compliance with the SysV ABI -;; rules about callee saved registers. In particular, from the ABI cocument we have the following: -;; -;; Registers %rbp, %rbx and %r12 through %r15 โ€œbelongโ€ to the calling function -;; and the called function is required to preserve their values. -;; (from "System V Application Binary Interface, AMD64 Architecture Processor Supplement") -;; -;; -%macro abi_checked_function 1 -GLOBAL %1:function - -%1: - -%if NASM_ENABLE_DEBUG != 0 - -;%warning compiling ABI checks - -; save all the callee-saved regs -push_callee_saved -push rax ; dummy push to align the stack (before we have rsp % 16 == 8) -call %1_inner -add rsp, 8 ; undo dummy push - -; load the function name (ok to clobber rdi since it's callee-saved) -mov rdi, %1_thunk_fn_name - -; now check whether any regs were clobbered -cmp rbp, [rsp + 40] -jne bad_rbp -cmp rbx, [rsp + 32] -jne bad_rbx -cmp r12, [rsp + 24] -jne bad_r12 -cmp r13, [rsp + 16] -jne bad_r13 -cmp r14, [rsp + 8] -jne bad_r14 -cmp r15, [rsp] -jne bad_r15 - -add rsp, 6 * 8 -ret - - -; here we store strings needed by the failure cases, in the .rodata section -[section .rodata] -%1_thunk_fn_name: -%ifdef __YASM_MAJOR__ -; yasm doesn't support defstr, so for now just use an unknown name -db "unknown (see yasm issue #95)",0 -%else -%defstr fname %1 -db fname,0 -%endif - -; restore the previous section -__SECT__ - -%1_inner: -%endif ; debug off, just assemble the function as-is without any checks - -%endmacro - - -;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IMPLEMENTATION FOLLOWS -;; below you find internal macros needed for the implementation of the above macros -;;;;;;;;;;;;;;;;;;;;;;;;;;; - -; generate the stubs for the bad_reg functions called from the check-abi thunk -%macro boil1 2 -bad_%1: -; A thunk has determined that a reg was clobbered -; each reg has their own bad_ function which moves the function name (in rdx) into -; rdi and loads a constant indicating which reg was involved and calls a C routine -; that will do the rest (abort the program generall). We follow up with an ud2 in case -; the C routine returns, since this mechanism is not designed for recovery. -mov rsi, %2 -; here we set up a stack frame - this gives a meaningful backtrace in any core file produced by the abort -; first we need to pop the saved regs off the stack so the rbp chain is consistent -add rsp, 6 * 8 -push rbp -mov rbp, rsp -call nasm_util_die_on_reg_clobber -ud2 -%endmacro - - - - diff --git a/tools/avx-turbo/once.h b/tools/avx-turbo/once.h deleted file mode 100644 index 586bcac..0000000 --- a/tools/avx-turbo/once.h +++ /dev/null @@ -1,128 +0,0 @@ -/* Once (v1) - * Portable Snippets - https://gitub.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - */ - -#if !defined(PSNIP_ONCE__H) -#define PSNIP_ONCE__H - -#define PSNIP_ONCE__BACKEND_ATOMIC 1 -#define PSNIP_ONCE__BACKEND_PTHREAD 2 -#define PSNIP_ONCE__BACKEND_NONE 3 -#define PSNIP_ONCE__BACKEND_C11 11 -#define PSNIP_ONCE__BACKEND_WIN32 32 - -#if !defined(PSNIP_ONCE_BACKEND) -# if defined(__STDC_NO_THREADS__) && __STDC_NO_THREADS__ -# elif defined(__EMSCRIPTEN__) -# elif defined(__has_include) -# if __has_include() -# include -# define PSNIP_ONCE_BACKEND PSNIP_ONCE__BACKEND_C11 -# endif -# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201102L) && !defined(__STDC_NO_THREADS__) -# include -# if defined(__STDC_NO_THREADS__) || (defined(__GNUC__) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 16))) -/* stdc-predef.h didn't include __STDC_NO_THREADS__ until 2.16. icc - doesn't include stdc-predef.h until we pull in limits.h, so the - first check doesn't work. */ -# else -# include -# define PSNIP_ONCE_BACKEND PSNIP_ONCE__BACKEND_C11 -# endif -# endif -#endif - -#if !defined(PSNIP_ONCE_BACKEND) && defined(_WIN32) && (!defined(WINVER) || (defined(WINVER) && (WINVER >= 0x0600))) -# include -# define PSNIP_ONCE_BACKEND PSNIP_ONCE__BACKEND_WIN32 -#endif - -#if !defined(PSNIP_ONCE_BACKEND) && defined(PTHREAD_ONCE_INIT) -# define PSNIP_ONCE_BACKEND PSNIP_ONCE__BACKEND_PTHREAD -#endif - -#if !defined(PSNIP_ONCE_BACKEND) -# include "atomic.h" -# if !defined(PSNIP_ATOMIC_NOT_FOUND) -# define PSNIP_ONCE_BACKEND PSNIP_ONCE__BACKEND_ATOMIC -# endif -#endif - -#if !defined(PSNIP_ONCE_BACKEND) -# error No once backend found. -#endif - -#if defined(__GNUC__) && (__GNUC__ >= 3) -# define PSNIP_ONCE__UNLIKELY(expr) __builtin_expect(!!(expr), !!0) -#else -# define PSNIP_ONCE__UNLIKELY(expr) (!!(expr)) -#endif - -#if PSNIP_ONCE_BACKEND == PSNIP_ONCE__BACKEND_C11 -# define PSNIP_ONCE_INIT ONCE_FLAG_INIT -typedef once_flag psnip_once; -# define psnip_once_call(flag, func) call_once(flag, func) -#elif PSNIP_ONCE_BACKEND == PSNIP_ONCE__BACKEND_PTHREAD -# define PSNIP_ONCE_INIT PTHREAD_ONCE_INIT -typedef pthread_once_t psnip_once; -# define psnip_once_call(flag, func) pthread_once(flag, func) -#elif PSNIP_ONCE_BACKEND == PSNIP_ONCE__BACKEND_WIN32 -# define PSNIP_ONCE_INIT INIT_ONCE_STATIC_INIT -typedef INIT_ONCE psnip_once; -static BOOL CALLBACK psnip_once__callback_wrap(INIT_ONCE* InitOnce, void* Parameter, void** Context) { - (void) Context; - (void) InitOnce; -#if defined(_MSC_VER) -# pragma warning(push) -# pragma warning(disable:4055) -#endif - ((void (*)(void)) Parameter)(); -#if defined(_MSC_VER) -# pragma warning(pop) -#endif - return !0; -} -# if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define psnip_once_call(flag, func) \ - __pragma(warning(push)) \ - __pragma(warning(disable:4152)) \ - InitOnceExecuteOnce(flag, &psnip_once__callback_wrap, func, NULL) \ - __pragma(warning(pop)) -# else -# define psnip_once_call(flag, func) InitOnceExecuteOnce(flag, &psnip_once__callback_wrap, func, NULL) -# endif -#elif PSNIP_ONCE_BACKEND == PSNIP_ONCE__BACKEND_ATOMIC -# define PSNIP_ONCE_INIT PSNIP_ATOMIC_VAR_INIT(0) -typedef psnip_atomic_int32 psnip_once; -static void psnip_once_call(psnip_once* flag, void (*func)(void)) { - psnip_int32_t state = psnip_atomic_int32_load(flag); - if (PSNIP_ONCE__UNLIKELY(state == 0)) { - if (psnip_atomic_int32_compare_exchange(flag, &state, 1)) { - func(); - psnip_atomic_int32_store(flag, 2); - } else { - do { - /* Spin; another thread is calling the initialization - function. */ - } while (psnip_atomic_int32_load(flag) == 1); - } - } -} -#elif PSNIP_ONCE_BACKEND == PSNIP_ONCE__BACKEND_NONE -# define PSNIP_ONCE_INIT 0 -typedef int psnip_once; -static void psnip_once_call(psnip_once* flag, void (*func)(void)) { - if (*flag == 0) { - func(); - *flag = 1; - } -} -#endif - -#endif /* !defined(PSNIP_ONCE__H) */ diff --git a/tools/avx-turbo/stats.hpp b/tools/avx-turbo/stats.hpp deleted file mode 100644 index ab938be..0000000 --- a/tools/avx-turbo/stats.hpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Really simple descriptive stats. - * - * stats.hpp - */ - -#ifndef STATS_HPP_ -#define STATS_HPP_ - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace Stats { - -class DescriptiveStats { - double min_, max_, avg_, median_; - size_t count_; -public: - DescriptiveStats(double min, double max, double avg, double median, size_t count) : - min_(min), max_(max), avg_(avg), median_(median), count_(count) {} - - double getAvg() const { - return avg_; - } - - size_t getCount() const { - return count_; - } - - double getMax() const { - return max_; - } - - double getMin() const { - return min_; - } - - double getMedian() const { - return median_; - } - - /* - * Return a string with the values of min/median/avg/max at the specified precision. - * Note that the count is not included. - */ - std::string getString4(int width, int precision) const { - std::ostringstream os; - os << std::fixed << std::setprecision(precision) << - std::setw(width) << getMin() << "/" << - std::setw(width) << getMedian() << "/" << - std::setw(width) << getAvg() << "/" << - std::setw(width) << getMax(); - - return os.str(); - } -}; - -template -typename std::iterator_traits::value_type median(iter_type first, iter_type last, LESS comp) { - if (first == last) { - throw std::logic_error("can't get median of empty range"); - } - using T = typename std::iterator_traits::value_type; - std::vector copy(first, last); - std::sort(copy.begin(), copy.end(), comp); - size_t sz = copy.size(), half_sz = sz / 2; - return sz % 2 ? copy[half_sz] : (copy[half_sz - 1] + copy[half_sz]) / 2; -} - -/** - * Like median above, except that with an even number of elements, where there are two middle elements with - * equal claim to the throne, the lesser of the two elements is returned rather trying to average them. This - * method is more generally applicable since it always returns on of the elements of the input range directly - * and doesn't require the elements to expose the operations required to calculate an average. - */ -template -typename std::iterator_traits::value_type medianf(iter_type first, iter_type last, LESS comp) { - if (first == last) { - throw std::logic_error("can't get median of empty range"); - } - using T = typename std::iterator_traits::value_type; - std::vector copy(first, last); - std::sort(copy.begin(), copy.end(), comp); - assert(!copy.empty()); - return copy[(copy.size() - 1) / 2]; -} - -template -typename std::iterator_traits::value_type median(iter_type first, iter_type last) { - auto p = std::less::value_type>(); - return Stats::median(first, last, p); -} - - -template -DescriptiveStats get_stats(iter_type first, iter_type last) { - using dlimits = std::numeric_limits; - double min = dlimits::max(), max = dlimits::min(), total = 0; - size_t count = 0; - for (iter_type itr = first; itr != last; itr++) { - auto val = *itr; - double vald = val; - if (vald < min) min = vald; - if (vald > max) max = vald; - total += vald; - count++; - } - - return DescriptiveStats(min, max, total / count, median(first, last), count); -} - - - -inline std::ostream& operator<<(std::ostream &os, const DescriptiveStats &stats) { - os << "min=" << stats.getMin() << ", median=" << stats.getMedian() << ", avg=" << stats.getAvg() - << ", max=" << stats.getMax() << ", n=" << stats.getCount(); - return os; -} - -} // namepsace Stats - -#endif /* STATS_HPP_ */ diff --git a/tools/avx-turbo/table.hpp b/tools/avx-turbo/table.hpp deleted file mode 100644 index 9c65fd9..0000000 --- a/tools/avx-turbo/table.hpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * table.hpp - * - * Simple tabular output. - */ - -#ifndef TABLE_HPP_ -#define TABLE_HPP_ - -#include -#include -#include -#include -#include -#include - -namespace table { - -/* - * Given a printf-style format and args, return the formatted string as a std::string. - * - * See https://stackoverflow.com/a/26221725/149138. - */ -template -std::string string_format(const std::string& format, Args ... args) { - size_t size = snprintf( nullptr, 0, format.c_str(), args ... ) + 1; // Extra space for '\0' - std::unique_ptr buf( new char[ size ] ); - snprintf( buf.get(), size, format.c_str(), args ... ); - return std::string( buf.get(), buf.get() + size - 1 ); // We don't want the '\0' inside -} - -class Table; - -struct ColInfo { - enum Justification { LEFT, RIGHT } justify; - ColInfo() : justify(LEFT) {} -}; - -class Row { - friend Table; - using row_t = std::vector; - - const Table* table_; - row_t elems_; - - Row(const Table& table) : table_(&table) {} - - /** return a vector of sizes for each element */ - std::vector getSizes() const { - std::vector sizes; - for (const auto& e : elems_) { - sizes.push_back(e.size()); - } - return sizes; - } - - inline void str(std::ostream& os, const std::vector sizes) const; - - std::string justify(const ColInfo& cinfo, const std::string& e, size_t w) const { - // left pad - std::stringstream ss; - ss << std::setw(w) << (cinfo.justify == ColInfo::LEFT ? std::left : std::right) << e; - auto s = ss.str(); - assert(s.size() == w); - return s; - } - -public: - /** add a cell to this row with the given element, returns a reference to this row */ - template - Row& add(const T& elem) { - std::stringstream ss; - ss << elem; - elems_.push_back(ss.str()); - return *this; - } - - /** - * Add a formatted cell to this row with the given element. - * The format is a printf-style format string and any additional arguments are the format arguments. - * Returns a reference to this row. - */ - template - Row& addf(const char* format, Args ... args) { - elems_.push_back(string_format(format, args...)); - return *this; - } - - /** the number of elements currently in the row */ - size_t size() { - return elems_.size(); - } -}; - -class Table { - friend Row; - using table_t = std::vector; - using colinfo_t = std::vector; - - table_t rows_; - colinfo_t colinfo_; - std::string sep; - -public: - - Table() : sep(" ") {} - - /** - * Get a reference to the ColInfo object for the given column, which lets you - * set column-global info such as the justification. - */ - ColInfo& colInfo(size_t col) { - if (col >= colinfo_.size()) { - colinfo_.resize(col + 1); - } - return colinfo_.at(col); - } - - /* in the cost case, return a default ColInfo if it doesn't exist */ - ColInfo colInfo(size_t col) const { - return col < colinfo_.size() ? colinfo_.at(col) : ColInfo{}; - } - - Row& newRow() { - rows_.push_back(Row{*this}); - return rows_.back(); - } - - /** return the current representation of the table as a string */ - std::string str() const { - - // calculate max row sizes - std::vector max_sizes; - for (const auto& r : rows_) { - std::vector sizes = r.getSizes(); - for (size_t c = 0; c < sizes.size(); c++) { - size_t row_size = sizes[c]; - if (c >= max_sizes.size()) { - assert(max_sizes.size() == c); - max_sizes.push_back(row_size); - } else { - max_sizes[c] = std::max(max_sizes[c], row_size); - } - } - } - - std::stringstream ss; - for (const auto& r : rows_) { - r.str(ss, max_sizes); - ss << "\n"; - } - - return ss.str(); - } - - void setColColumnSeparator(std::string s) { - sep = s; - } - -}; - -inline void Row::str(std::ostream& os, const std::vector sizes) const -{ - bool first = true; - for (size_t c = 0; c < elems_.size(); c++) { - const auto& e = elems_[c]; - assert(c < sizes.size()); - if (!first) os << table_->sep; // inter-cell padding - first = false; - os << justify(table_->colInfo(c), e, sizes[c]); - } -} - -} - - -#endif /* TABLE_HPP_ */ diff --git a/tools/avx-turbo/test/catch.hpp b/tools/avx-turbo/test/catch.hpp deleted file mode 100644 index 7e706f9..0000000 --- a/tools/avx-turbo/test/catch.hpp +++ /dev/null @@ -1,17959 +0,0 @@ -/* - * Catch v2.13.7 - * Generated: 2021-07-28 20:29:27.753164 - * ---------------------------------------------------------- - * This file has been merged from multiple headers. Please don't edit it directly - * Copyright (c) 2021 Two Blue Cubes Ltd. All rights reserved. - * - * Distributed under the Boost Software License, Version 1.0. (See accompanying - * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - */ -#ifndef TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED -#define TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED -// start catch.hpp - - -#define CATCH_VERSION_MAJOR 2 -#define CATCH_VERSION_MINOR 13 -#define CATCH_VERSION_PATCH 7 - -#ifdef __clang__ -# pragma clang system_header -#elif defined __GNUC__ -# pragma GCC system_header -#endif - -// start catch_suppress_warnings.h - -#ifdef __clang__ -# ifdef __ICC // icpc defines the __clang__ macro -# pragma warning(push) -# pragma warning(disable: 161 1682) -# else // __ICC -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wpadded" -# pragma clang diagnostic ignored "-Wswitch-enum" -# pragma clang diagnostic ignored "-Wcovered-switch-default" -# endif -#elif defined __GNUC__ - // Because REQUIREs trigger GCC's -Wparentheses, and because still - // supported version of g++ have only buggy support for _Pragmas, - // Wparentheses have to be suppressed globally. -# pragma GCC diagnostic ignored "-Wparentheses" // See #674 for details - -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wunused-variable" -# pragma GCC diagnostic ignored "-Wpadded" -#endif -// end catch_suppress_warnings.h -#if defined(CATCH_CONFIG_MAIN) || defined(CATCH_CONFIG_RUNNER) -# define CATCH_IMPL -# define CATCH_CONFIG_ALL_PARTS -#endif - -// In the impl file, we want to have access to all parts of the headers -// Can also be used to sanely support PCHs -#if defined(CATCH_CONFIG_ALL_PARTS) -# define CATCH_CONFIG_EXTERNAL_INTERFACES -# if defined(CATCH_CONFIG_DISABLE_MATCHERS) -# undef CATCH_CONFIG_DISABLE_MATCHERS -# endif -# if !defined(CATCH_CONFIG_ENABLE_CHRONO_STRINGMAKER) -# define CATCH_CONFIG_ENABLE_CHRONO_STRINGMAKER -# endif -#endif - -#if !defined(CATCH_CONFIG_IMPL_ONLY) -// start catch_platform.h - -// See e.g.: -// https://opensource.apple.com/source/CarbonHeaders/CarbonHeaders-18.1/TargetConditionals.h.auto.html -#ifdef __APPLE__ -# include -# if (defined(TARGET_OS_OSX) && TARGET_OS_OSX == 1) || \ - (defined(TARGET_OS_MAC) && TARGET_OS_MAC == 1) -# define CATCH_PLATFORM_MAC -# elif (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE == 1) -# define CATCH_PLATFORM_IPHONE -# endif - -#elif defined(linux) || defined(__linux) || defined(__linux__) -# define CATCH_PLATFORM_LINUX - -#elif defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) || defined(__MINGW32__) -# define CATCH_PLATFORM_WINDOWS -#endif - -// end catch_platform.h - -#ifdef CATCH_IMPL -# ifndef CLARA_CONFIG_MAIN -# define CLARA_CONFIG_MAIN_NOT_DEFINED -# define CLARA_CONFIG_MAIN -# endif -#endif - -// start catch_user_interfaces.h - -namespace Catch { - unsigned int rngSeed(); -} - -// end catch_user_interfaces.h -// start catch_tag_alias_autoregistrar.h - -// start catch_common.h - -// start catch_compiler_capabilities.h - -// Detect a number of compiler features - by compiler -// The following features are defined: -// -// CATCH_CONFIG_COUNTER : is the __COUNTER__ macro supported? -// CATCH_CONFIG_WINDOWS_SEH : is Windows SEH supported? -// CATCH_CONFIG_POSIX_SIGNALS : are POSIX signals supported? -// CATCH_CONFIG_DISABLE_EXCEPTIONS : Are exceptions enabled? -// **************** -// Note to maintainers: if new toggles are added please document them -// in configuration.md, too -// **************** - -// In general each macro has a _NO_ form -// (e.g. CATCH_CONFIG_NO_POSIX_SIGNALS) which disables the feature. -// Many features, at point of detection, define an _INTERNAL_ macro, so they -// can be combined, en-mass, with the _NO_ forms later. - -#ifdef __cplusplus - -# if (__cplusplus >= 201402L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) -# define CATCH_CPP14_OR_GREATER -# endif - -# if (__cplusplus >= 201703L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) -# define CATCH_CPP17_OR_GREATER -# endif - -#endif - -// Only GCC compiler should be used in this block, so other compilers trying to -// mask themselves as GCC should be ignored. -#if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC) && !defined(__CUDACC__) && !defined(__LCC__) -# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma( "GCC diagnostic push" ) -# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma( "GCC diagnostic pop" ) - -# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) (void)__builtin_constant_p(__VA_ARGS__) - -#endif - -#if defined(__clang__) - -# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION _Pragma( "clang diagnostic push" ) -# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION _Pragma( "clang diagnostic pop" ) - -// As of this writing, IBM XL's implementation of __builtin_constant_p has a bug -// which results in calls to destructors being emitted for each temporary, -// without a matching initialization. In practice, this can result in something -// like `std::string::~string` being called on an uninitialized value. -// -// For example, this code will likely segfault under IBM XL: -// ``` -// REQUIRE(std::string("12") + "34" == "1234") -// ``` -// -// Therefore, `CATCH_INTERNAL_IGNORE_BUT_WARN` is not implemented. -# if !defined(__ibmxl__) && !defined(__CUDACC__) -# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) (void)__builtin_constant_p(__VA_ARGS__) /* NOLINT(cppcoreguidelines-pro-type-vararg, hicpp-vararg) */ -# endif - -# define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ - _Pragma( "clang diagnostic ignored \"-Wexit-time-destructors\"" ) \ - _Pragma( "clang diagnostic ignored \"-Wglobal-constructors\"") - -# define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ - _Pragma( "clang diagnostic ignored \"-Wparentheses\"" ) - -# define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS \ - _Pragma( "clang diagnostic ignored \"-Wunused-variable\"" ) - -# define CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS \ - _Pragma( "clang diagnostic ignored \"-Wgnu-zero-variadic-macro-arguments\"" ) - -# define CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS \ - _Pragma( "clang diagnostic ignored \"-Wunused-template\"" ) - -#endif // __clang__ - -//////////////////////////////////////////////////////////////////////////////// -// Assume that non-Windows platforms support posix signals by default -#if !defined(CATCH_PLATFORM_WINDOWS) - #define CATCH_INTERNAL_CONFIG_POSIX_SIGNALS -#endif - -//////////////////////////////////////////////////////////////////////////////// -// We know some environments not to support full POSIX signals -#if defined(__CYGWIN__) || defined(__QNX__) || defined(__EMSCRIPTEN__) || defined(__DJGPP__) - #define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS -#endif - -#ifdef __OS400__ -# define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS -# define CATCH_CONFIG_COLOUR_NONE -#endif - -//////////////////////////////////////////////////////////////////////////////// -// Android somehow still does not support std::to_string -#if defined(__ANDROID__) -# define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING -# define CATCH_INTERNAL_CONFIG_ANDROID_LOGWRITE -#endif - -//////////////////////////////////////////////////////////////////////////////// -// Not all Windows environments support SEH properly -#if defined(__MINGW32__) -# define CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH -#endif - -//////////////////////////////////////////////////////////////////////////////// -// PS4 -#if defined(__ORBIS__) -# define CATCH_INTERNAL_CONFIG_NO_NEW_CAPTURE -#endif - -//////////////////////////////////////////////////////////////////////////////// -// Cygwin -#ifdef __CYGWIN__ - -// Required for some versions of Cygwin to declare gettimeofday -// see: http://stackoverflow.com/questions/36901803/gettimeofday-not-declared-in-this-scope-cygwin -# define _BSD_SOURCE -// some versions of cygwin (most) do not support std::to_string. Use the libstd check. -// https://gcc.gnu.org/onlinedocs/gcc-4.8.2/libstdc++/api/a01053_source.html line 2812-2813 -# if !((__cplusplus >= 201103L) && defined(_GLIBCXX_USE_C99) \ - && !defined(_GLIBCXX_HAVE_BROKEN_VSWPRINTF)) - -# define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING - -# endif -#endif // __CYGWIN__ - -//////////////////////////////////////////////////////////////////////////////// -// Visual C++ -#if defined(_MSC_VER) - -# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION __pragma( warning(push) ) -# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION __pragma( warning(pop) ) - -// Universal Windows platform does not support SEH -// Or console colours (or console at all...) -# if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP) -# define CATCH_CONFIG_COLOUR_NONE -# else -# define CATCH_INTERNAL_CONFIG_WINDOWS_SEH -# endif - -// MSVC traditional preprocessor needs some workaround for __VA_ARGS__ -// _MSVC_TRADITIONAL == 0 means new conformant preprocessor -// _MSVC_TRADITIONAL == 1 means old traditional non-conformant preprocessor -# if !defined(__clang__) // Handle Clang masquerading for msvc -# if !defined(_MSVC_TRADITIONAL) || (defined(_MSVC_TRADITIONAL) && _MSVC_TRADITIONAL) -# define CATCH_INTERNAL_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR -# endif // MSVC_TRADITIONAL -# endif // __clang__ - -#endif // _MSC_VER - -#if defined(_REENTRANT) || defined(_MSC_VER) -// Enable async processing, as -pthread is specified or no additional linking is required -# define CATCH_INTERNAL_CONFIG_USE_ASYNC -#endif // _MSC_VER - -//////////////////////////////////////////////////////////////////////////////// -// Check if we are compiled with -fno-exceptions or equivalent -#if defined(__EXCEPTIONS) || defined(__cpp_exceptions) || defined(_CPPUNWIND) -# define CATCH_INTERNAL_CONFIG_EXCEPTIONS_ENABLED -#endif - -//////////////////////////////////////////////////////////////////////////////// -// DJGPP -#ifdef __DJGPP__ -# define CATCH_INTERNAL_CONFIG_NO_WCHAR -#endif // __DJGPP__ - -//////////////////////////////////////////////////////////////////////////////// -// Embarcadero C++Build -#if defined(__BORLANDC__) - #define CATCH_INTERNAL_CONFIG_POLYFILL_ISNAN -#endif - -//////////////////////////////////////////////////////////////////////////////// - -// Use of __COUNTER__ is suppressed during code analysis in -// CLion/AppCode 2017.2.x and former, because __COUNTER__ is not properly -// handled by it. -// Otherwise all supported compilers support COUNTER macro, -// but user still might want to turn it off -#if ( !defined(__JETBRAINS_IDE__) || __JETBRAINS_IDE__ >= 20170300L ) - #define CATCH_INTERNAL_CONFIG_COUNTER -#endif - -//////////////////////////////////////////////////////////////////////////////// - -// RTX is a special version of Windows that is real time. -// This means that it is detected as Windows, but does not provide -// the same set of capabilities as real Windows does. -#if defined(UNDER_RTSS) || defined(RTX64_BUILD) - #define CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH - #define CATCH_INTERNAL_CONFIG_NO_ASYNC - #define CATCH_CONFIG_COLOUR_NONE -#endif - -#if !defined(_GLIBCXX_USE_C99_MATH_TR1) -#define CATCH_INTERNAL_CONFIG_GLOBAL_NEXTAFTER -#endif - -// Various stdlib support checks that require __has_include -#if defined(__has_include) - // Check if string_view is available and usable - #if __has_include() && defined(CATCH_CPP17_OR_GREATER) - # define CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW - #endif - - // Check if optional is available and usable - # if __has_include() && defined(CATCH_CPP17_OR_GREATER) - # define CATCH_INTERNAL_CONFIG_CPP17_OPTIONAL - # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) - - // Check if byte is available and usable - # if __has_include() && defined(CATCH_CPP17_OR_GREATER) - # include - # if defined(__cpp_lib_byte) && (__cpp_lib_byte > 0) - # define CATCH_INTERNAL_CONFIG_CPP17_BYTE - # endif - # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) - - // Check if variant is available and usable - # if __has_include() && defined(CATCH_CPP17_OR_GREATER) - # if defined(__clang__) && (__clang_major__ < 8) - // work around clang bug with libstdc++ https://bugs.llvm.org/show_bug.cgi?id=31852 - // fix should be in clang 8, workaround in libstdc++ 8.2 - # include - # if defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) - # define CATCH_CONFIG_NO_CPP17_VARIANT - # else - # define CATCH_INTERNAL_CONFIG_CPP17_VARIANT - # endif // defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) - # else - # define CATCH_INTERNAL_CONFIG_CPP17_VARIANT - # endif // defined(__clang__) && (__clang_major__ < 8) - # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) -#endif // defined(__has_include) - -#if defined(CATCH_INTERNAL_CONFIG_COUNTER) && !defined(CATCH_CONFIG_NO_COUNTER) && !defined(CATCH_CONFIG_COUNTER) -# define CATCH_CONFIG_COUNTER -#endif -#if defined(CATCH_INTERNAL_CONFIG_WINDOWS_SEH) && !defined(CATCH_CONFIG_NO_WINDOWS_SEH) && !defined(CATCH_CONFIG_WINDOWS_SEH) && !defined(CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH) -# define CATCH_CONFIG_WINDOWS_SEH -#endif -// This is set by default, because we assume that unix compilers are posix-signal-compatible by default. -#if defined(CATCH_INTERNAL_CONFIG_POSIX_SIGNALS) && !defined(CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_POSIX_SIGNALS) -# define CATCH_CONFIG_POSIX_SIGNALS -#endif -// This is set by default, because we assume that compilers with no wchar_t support are just rare exceptions. -#if !defined(CATCH_INTERNAL_CONFIG_NO_WCHAR) && !defined(CATCH_CONFIG_NO_WCHAR) && !defined(CATCH_CONFIG_WCHAR) -# define CATCH_CONFIG_WCHAR -#endif - -#if !defined(CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING) && !defined(CATCH_CONFIG_NO_CPP11_TO_STRING) && !defined(CATCH_CONFIG_CPP11_TO_STRING) -# define CATCH_CONFIG_CPP11_TO_STRING -#endif - -#if defined(CATCH_INTERNAL_CONFIG_CPP17_OPTIONAL) && !defined(CATCH_CONFIG_NO_CPP17_OPTIONAL) && !defined(CATCH_CONFIG_CPP17_OPTIONAL) -# define CATCH_CONFIG_CPP17_OPTIONAL -#endif - -#if defined(CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW) && !defined(CATCH_CONFIG_NO_CPP17_STRING_VIEW) && !defined(CATCH_CONFIG_CPP17_STRING_VIEW) -# define CATCH_CONFIG_CPP17_STRING_VIEW -#endif - -#if defined(CATCH_INTERNAL_CONFIG_CPP17_VARIANT) && !defined(CATCH_CONFIG_NO_CPP17_VARIANT) && !defined(CATCH_CONFIG_CPP17_VARIANT) -# define CATCH_CONFIG_CPP17_VARIANT -#endif - -#if defined(CATCH_INTERNAL_CONFIG_CPP17_BYTE) && !defined(CATCH_CONFIG_NO_CPP17_BYTE) && !defined(CATCH_CONFIG_CPP17_BYTE) -# define CATCH_CONFIG_CPP17_BYTE -#endif - -#if defined(CATCH_CONFIG_EXPERIMENTAL_REDIRECT) -# define CATCH_INTERNAL_CONFIG_NEW_CAPTURE -#endif - -#if defined(CATCH_INTERNAL_CONFIG_NEW_CAPTURE) && !defined(CATCH_INTERNAL_CONFIG_NO_NEW_CAPTURE) && !defined(CATCH_CONFIG_NO_NEW_CAPTURE) && !defined(CATCH_CONFIG_NEW_CAPTURE) -# define CATCH_CONFIG_NEW_CAPTURE -#endif - -#if !defined(CATCH_INTERNAL_CONFIG_EXCEPTIONS_ENABLED) && !defined(CATCH_CONFIG_DISABLE_EXCEPTIONS) -# define CATCH_CONFIG_DISABLE_EXCEPTIONS -#endif - -#if defined(CATCH_INTERNAL_CONFIG_POLYFILL_ISNAN) && !defined(CATCH_CONFIG_NO_POLYFILL_ISNAN) && !defined(CATCH_CONFIG_POLYFILL_ISNAN) -# define CATCH_CONFIG_POLYFILL_ISNAN -#endif - -#if defined(CATCH_INTERNAL_CONFIG_USE_ASYNC) && !defined(CATCH_INTERNAL_CONFIG_NO_ASYNC) && !defined(CATCH_CONFIG_NO_USE_ASYNC) && !defined(CATCH_CONFIG_USE_ASYNC) -# define CATCH_CONFIG_USE_ASYNC -#endif - -#if defined(CATCH_INTERNAL_CONFIG_ANDROID_LOGWRITE) && !defined(CATCH_CONFIG_NO_ANDROID_LOGWRITE) && !defined(CATCH_CONFIG_ANDROID_LOGWRITE) -# define CATCH_CONFIG_ANDROID_LOGWRITE -#endif - -#if defined(CATCH_INTERNAL_CONFIG_GLOBAL_NEXTAFTER) && !defined(CATCH_CONFIG_NO_GLOBAL_NEXTAFTER) && !defined(CATCH_CONFIG_GLOBAL_NEXTAFTER) -# define CATCH_CONFIG_GLOBAL_NEXTAFTER -#endif - -// Even if we do not think the compiler has that warning, we still have -// to provide a macro that can be used by the code. -#if !defined(CATCH_INTERNAL_START_WARNINGS_SUPPRESSION) -# define CATCH_INTERNAL_START_WARNINGS_SUPPRESSION -#endif -#if !defined(CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION) -# define CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION -#endif -#if !defined(CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS) -# define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS -#endif -#if !defined(CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS) -# define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS -#endif -#if !defined(CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS) -# define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS -#endif -#if !defined(CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS) -# define CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS -#endif - -// The goal of this macro is to avoid evaluation of the arguments, but -// still have the compiler warn on problems inside... -#if !defined(CATCH_INTERNAL_IGNORE_BUT_WARN) -# define CATCH_INTERNAL_IGNORE_BUT_WARN(...) -#endif - -#if defined(__APPLE__) && defined(__apple_build_version__) && (__clang_major__ < 10) -# undef CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS -#elif defined(__clang__) && (__clang_major__ < 5) -# undef CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS -#endif - -#if !defined(CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS) -# define CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS -#endif - -#if defined(CATCH_CONFIG_DISABLE_EXCEPTIONS) -#define CATCH_TRY if ((true)) -#define CATCH_CATCH_ALL if ((false)) -#define CATCH_CATCH_ANON(type) if ((false)) -#else -#define CATCH_TRY try -#define CATCH_CATCH_ALL catch (...) -#define CATCH_CATCH_ANON(type) catch (type) -#endif - -#if defined(CATCH_INTERNAL_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR) && !defined(CATCH_CONFIG_NO_TRADITIONAL_MSVC_PREPROCESSOR) && !defined(CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR) -#define CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR -#endif - -// end catch_compiler_capabilities.h -#define INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) name##line -#define INTERNAL_CATCH_UNIQUE_NAME_LINE( name, line ) INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) -#ifdef CATCH_CONFIG_COUNTER -# define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __COUNTER__ ) -#else -# define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __LINE__ ) -#endif - -#include -#include -#include - -// We need a dummy global operator<< so we can bring it into Catch namespace later -struct Catch_global_namespace_dummy {}; -std::ostream& operator<<(std::ostream&, Catch_global_namespace_dummy); - -namespace Catch { - - struct CaseSensitive { enum Choice { - Yes, - No - }; }; - - class NonCopyable { - NonCopyable( NonCopyable const& ) = delete; - NonCopyable( NonCopyable && ) = delete; - NonCopyable& operator = ( NonCopyable const& ) = delete; - NonCopyable& operator = ( NonCopyable && ) = delete; - - protected: - NonCopyable(); - virtual ~NonCopyable(); - }; - - struct SourceLineInfo { - - SourceLineInfo() = delete; - SourceLineInfo( char const* _file, std::size_t _line ) noexcept - : file( _file ), - line( _line ) - {} - - SourceLineInfo( SourceLineInfo const& other ) = default; - SourceLineInfo& operator = ( SourceLineInfo const& ) = default; - SourceLineInfo( SourceLineInfo&& ) noexcept = default; - SourceLineInfo& operator = ( SourceLineInfo&& ) noexcept = default; - - bool empty() const noexcept { return file[0] == '\0'; } - bool operator == ( SourceLineInfo const& other ) const noexcept; - bool operator < ( SourceLineInfo const& other ) const noexcept; - - char const* file; - std::size_t line; - }; - - std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info ); - - // Bring in operator<< from global namespace into Catch namespace - // This is necessary because the overload of operator<< above makes - // lookup stop at namespace Catch - using ::operator<<; - - // Use this in variadic streaming macros to allow - // >> +StreamEndStop - // as well as - // >> stuff +StreamEndStop - struct StreamEndStop { - std::string operator+() const; - }; - template - T const& operator + ( T const& value, StreamEndStop ) { - return value; - } -} - -#define CATCH_INTERNAL_LINEINFO \ - ::Catch::SourceLineInfo( __FILE__, static_cast( __LINE__ ) ) - -// end catch_common.h -namespace Catch { - - struct RegistrarForTagAliases { - RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo ); - }; - -} // end namespace Catch - -#define CATCH_REGISTER_TAG_ALIAS( alias, spec ) \ - CATCH_INTERNAL_START_WARNINGS_SUPPRESSION \ - CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ - namespace{ Catch::RegistrarForTagAliases INTERNAL_CATCH_UNIQUE_NAME( AutoRegisterTagAlias )( alias, spec, CATCH_INTERNAL_LINEINFO ); } \ - CATCH_INTERNAL_STOP_WARNINGS_SUPPRESSION - -// end catch_tag_alias_autoregistrar.h -// start catch_test_registry.h - -// start catch_interfaces_testcase.h - -#include - -namespace Catch { - - class TestSpec; - - struct ITestInvoker { - virtual void invoke () const = 0; - virtual ~ITestInvoker(); - }; - - class TestCase; - struct IConfig; - - struct ITestCaseRegistry { - virtual ~ITestCaseRegistry(); - virtual std::vector const& getAllTests() const = 0; - virtual std::vector const& getAllTestsSorted( IConfig const& config ) const = 0; - }; - - bool isThrowSafe( TestCase const& testCase, IConfig const& config ); - bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ); - std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ); - std::vector const& getAllTestCasesSorted( IConfig const& config ); - -} - -// end catch_interfaces_testcase.h -// start catch_stringref.h - -#include -#include -#include -#include - -namespace Catch { - - /// A non-owning string class (similar to the forthcoming std::string_view) - /// Note that, because a StringRef may be a substring of another string, - /// it may not be null terminated. - class StringRef { - public: - using size_type = std::size_t; - using const_iterator = const char*; - - private: - static constexpr char const* const s_empty = ""; - - char const* m_start = s_empty; - size_type m_size = 0; - - public: // construction - constexpr StringRef() noexcept = default; - - StringRef( char const* rawChars ) noexcept; - - constexpr StringRef( char const* rawChars, size_type size ) noexcept - : m_start( rawChars ), - m_size( size ) - {} - - StringRef( std::string const& stdString ) noexcept - : m_start( stdString.c_str() ), - m_size( stdString.size() ) - {} - - explicit operator std::string() const { - return std::string(m_start, m_size); - } - - public: // operators - auto operator == ( StringRef const& other ) const noexcept -> bool; - auto operator != (StringRef const& other) const noexcept -> bool { - return !(*this == other); - } - - auto operator[] ( size_type index ) const noexcept -> char { - assert(index < m_size); - return m_start[index]; - } - - public: // named queries - constexpr auto empty() const noexcept -> bool { - return m_size == 0; - } - constexpr auto size() const noexcept -> size_type { - return m_size; - } - - // Returns the current start pointer. If the StringRef is not - // null-terminated, throws std::domain_exception - auto c_str() const -> char const*; - - public: // substrings and searches - // Returns a substring of [start, start + length). - // If start + length > size(), then the substring is [start, size()). - // If start > size(), then the substring is empty. - auto substr( size_type start, size_type length ) const noexcept -> StringRef; - - // Returns the current start pointer. May not be null-terminated. - auto data() const noexcept -> char const*; - - constexpr auto isNullTerminated() const noexcept -> bool { - return m_start[m_size] == '\0'; - } - - public: // iterators - constexpr const_iterator begin() const { return m_start; } - constexpr const_iterator end() const { return m_start + m_size; } - }; - - auto operator += ( std::string& lhs, StringRef const& sr ) -> std::string&; - auto operator << ( std::ostream& os, StringRef const& sr ) -> std::ostream&; - - constexpr auto operator "" _sr( char const* rawChars, std::size_t size ) noexcept -> StringRef { - return StringRef( rawChars, size ); - } -} // namespace Catch - -constexpr auto operator "" _catch_sr( char const* rawChars, std::size_t size ) noexcept -> Catch::StringRef { - return Catch::StringRef( rawChars, size ); -} - -// end catch_stringref.h -// start catch_preprocessor.hpp - - -#define CATCH_RECURSION_LEVEL0(...) __VA_ARGS__ -#define CATCH_RECURSION_LEVEL1(...) CATCH_RECURSION_LEVEL0(CATCH_RECURSION_LEVEL0(CATCH_RECURSION_LEVEL0(__VA_ARGS__))) -#define CATCH_RECURSION_LEVEL2(...) CATCH_RECURSION_LEVEL1(CATCH_RECURSION_LEVEL1(CATCH_RECURSION_LEVEL1(__VA_ARGS__))) -#define CATCH_RECURSION_LEVEL3(...) CATCH_RECURSION_LEVEL2(CATCH_RECURSION_LEVEL2(CATCH_RECURSION_LEVEL2(__VA_ARGS__))) -#define CATCH_RECURSION_LEVEL4(...) CATCH_RECURSION_LEVEL3(CATCH_RECURSION_LEVEL3(CATCH_RECURSION_LEVEL3(__VA_ARGS__))) -#define CATCH_RECURSION_LEVEL5(...) CATCH_RECURSION_LEVEL4(CATCH_RECURSION_LEVEL4(CATCH_RECURSION_LEVEL4(__VA_ARGS__))) - -#ifdef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR -#define INTERNAL_CATCH_EXPAND_VARGS(...) __VA_ARGS__ -// MSVC needs more evaluations -#define CATCH_RECURSION_LEVEL6(...) CATCH_RECURSION_LEVEL5(CATCH_RECURSION_LEVEL5(CATCH_RECURSION_LEVEL5(__VA_ARGS__))) -#define CATCH_RECURSE(...) CATCH_RECURSION_LEVEL6(CATCH_RECURSION_LEVEL6(__VA_ARGS__)) -#else -#define CATCH_RECURSE(...) CATCH_RECURSION_LEVEL5(__VA_ARGS__) -#endif - -#define CATCH_REC_END(...) -#define CATCH_REC_OUT - -#define CATCH_EMPTY() -#define CATCH_DEFER(id) id CATCH_EMPTY() - -#define CATCH_REC_GET_END2() 0, CATCH_REC_END -#define CATCH_REC_GET_END1(...) CATCH_REC_GET_END2 -#define CATCH_REC_GET_END(...) CATCH_REC_GET_END1 -#define CATCH_REC_NEXT0(test, next, ...) next CATCH_REC_OUT -#define CATCH_REC_NEXT1(test, next) CATCH_DEFER ( CATCH_REC_NEXT0 ) ( test, next, 0) -#define CATCH_REC_NEXT(test, next) CATCH_REC_NEXT1(CATCH_REC_GET_END test, next) - -#define CATCH_REC_LIST0(f, x, peek, ...) , f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1) ) ( f, peek, __VA_ARGS__ ) -#define CATCH_REC_LIST1(f, x, peek, ...) , f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST0) ) ( f, peek, __VA_ARGS__ ) -#define CATCH_REC_LIST2(f, x, peek, ...) f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1) ) ( f, peek, __VA_ARGS__ ) - -#define CATCH_REC_LIST0_UD(f, userdata, x, peek, ...) , f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1_UD) ) ( f, userdata, peek, __VA_ARGS__ ) -#define CATCH_REC_LIST1_UD(f, userdata, x, peek, ...) , f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST0_UD) ) ( f, userdata, peek, __VA_ARGS__ ) -#define CATCH_REC_LIST2_UD(f, userdata, x, peek, ...) f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1_UD) ) ( f, userdata, peek, __VA_ARGS__ ) - -// Applies the function macro `f` to each of the remaining parameters, inserts commas between the results, -// and passes userdata as the first parameter to each invocation, -// e.g. CATCH_REC_LIST_UD(f, x, a, b, c) evaluates to f(x, a), f(x, b), f(x, c) -#define CATCH_REC_LIST_UD(f, userdata, ...) CATCH_RECURSE(CATCH_REC_LIST2_UD(f, userdata, __VA_ARGS__, ()()(), ()()(), ()()(), 0)) - -#define CATCH_REC_LIST(f, ...) CATCH_RECURSE(CATCH_REC_LIST2(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0)) - -#define INTERNAL_CATCH_EXPAND1(param) INTERNAL_CATCH_EXPAND2(param) -#define INTERNAL_CATCH_EXPAND2(...) INTERNAL_CATCH_NO## __VA_ARGS__ -#define INTERNAL_CATCH_DEF(...) INTERNAL_CATCH_DEF __VA_ARGS__ -#define INTERNAL_CATCH_NOINTERNAL_CATCH_DEF -#define INTERNAL_CATCH_STRINGIZE(...) INTERNAL_CATCH_STRINGIZE2(__VA_ARGS__) -#ifndef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR -#define INTERNAL_CATCH_STRINGIZE2(...) #__VA_ARGS__ -#define INTERNAL_CATCH_STRINGIZE_WITHOUT_PARENS(param) INTERNAL_CATCH_STRINGIZE(INTERNAL_CATCH_REMOVE_PARENS(param)) -#else -// MSVC is adding extra space and needs another indirection to expand INTERNAL_CATCH_NOINTERNAL_CATCH_DEF -#define INTERNAL_CATCH_STRINGIZE2(...) INTERNAL_CATCH_STRINGIZE3(__VA_ARGS__) -#define INTERNAL_CATCH_STRINGIZE3(...) #__VA_ARGS__ -#define INTERNAL_CATCH_STRINGIZE_WITHOUT_PARENS(param) (INTERNAL_CATCH_STRINGIZE(INTERNAL_CATCH_REMOVE_PARENS(param)) + 1) -#endif - -#define INTERNAL_CATCH_MAKE_NAMESPACE2(...) ns_##__VA_ARGS__ -#define INTERNAL_CATCH_MAKE_NAMESPACE(name) INTERNAL_CATCH_MAKE_NAMESPACE2(name) - -#define INTERNAL_CATCH_REMOVE_PARENS(...) INTERNAL_CATCH_EXPAND1(INTERNAL_CATCH_DEF __VA_ARGS__) - -#ifndef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR -#define INTERNAL_CATCH_MAKE_TYPE_LIST2(...) decltype(get_wrapper()) -#define INTERNAL_CATCH_MAKE_TYPE_LIST(...) INTERNAL_CATCH_MAKE_TYPE_LIST2(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__)) -#else -#define INTERNAL_CATCH_MAKE_TYPE_LIST2(...) INTERNAL_CATCH_EXPAND_VARGS(decltype(get_wrapper())) -#define INTERNAL_CATCH_MAKE_TYPE_LIST(...) INTERNAL_CATCH_EXPAND_VARGS(INTERNAL_CATCH_MAKE_TYPE_LIST2(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__))) -#endif - -#define INTERNAL_CATCH_MAKE_TYPE_LISTS_FROM_TYPES(...)\ - CATCH_REC_LIST(INTERNAL_CATCH_MAKE_TYPE_LIST,__VA_ARGS__) - -#define INTERNAL_CATCH_REMOVE_PARENS_1_ARG(_0) INTERNAL_CATCH_REMOVE_PARENS(_0) -#define INTERNAL_CATCH_REMOVE_PARENS_2_ARG(_0, _1) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_1_ARG(_1) -#define INTERNAL_CATCH_REMOVE_PARENS_3_ARG(_0, _1, _2) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_2_ARG(_1, _2) -#define INTERNAL_CATCH_REMOVE_PARENS_4_ARG(_0, _1, _2, _3) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_3_ARG(_1, _2, _3) -#define INTERNAL_CATCH_REMOVE_PARENS_5_ARG(_0, _1, _2, _3, _4) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_4_ARG(_1, _2, _3, _4) -#define INTERNAL_CATCH_REMOVE_PARENS_6_ARG(_0, _1, _2, _3, _4, _5) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_5_ARG(_1, _2, _3, _4, _5) -#define INTERNAL_CATCH_REMOVE_PARENS_7_ARG(_0, _1, _2, _3, _4, _5, _6) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_6_ARG(_1, _2, _3, _4, _5, _6) -#define INTERNAL_CATCH_REMOVE_PARENS_8_ARG(_0, _1, _2, _3, _4, _5, _6, _7) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_7_ARG(_1, _2, _3, _4, _5, _6, _7) -#define INTERNAL_CATCH_REMOVE_PARENS_9_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_8_ARG(_1, _2, _3, _4, _5, _6, _7, _8) -#define INTERNAL_CATCH_REMOVE_PARENS_10_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_9_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9) -#define INTERNAL_CATCH_REMOVE_PARENS_11_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_10_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10) - -#define INTERNAL_CATCH_VA_NARGS_IMPL(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N - -#define INTERNAL_CATCH_TYPE_GEN\ - template struct TypeList {};\ - template\ - constexpr auto get_wrapper() noexcept -> TypeList { return {}; }\ - template class...> struct TemplateTypeList{};\ - template class...Cs>\ - constexpr auto get_wrapper() noexcept -> TemplateTypeList { return {}; }\ - template\ - struct append;\ - template\ - struct rewrap;\ - template class, typename...>\ - struct create;\ - template class, typename>\ - struct convert;\ - \ - template \ - struct append { using type = T; };\ - template< template class L1, typename...E1, template class L2, typename...E2, typename...Rest>\ - struct append, L2, Rest...> { using type = typename append, Rest...>::type; };\ - template< template class L1, typename...E1, typename...Rest>\ - struct append, TypeList, Rest...> { using type = L1; };\ - \ - template< template class Container, template class List, typename...elems>\ - struct rewrap, List> { using type = TypeList>; };\ - template< template class Container, template class List, class...Elems, typename...Elements>\ - struct rewrap, List, Elements...> { using type = typename append>, typename rewrap, Elements...>::type>::type; };\ - \ - template