diff --git a/evergreen.yml b/evergreen.yml index 00a855441d..8a829badac 100644 --- a/evergreen.yml +++ b/evergreen.yml @@ -151,6 +151,10 @@ tasks: commands: - func: f_lint_workloads +- name: t_lint_python + commands: + - func: f_lint_python + - name: t_cmake_test commands: - func: f_cmake_test @@ -219,6 +223,7 @@ task_groups: # run after t_compile. - t_compile - t_python_test + - t_lint_python - t_lint_workloads - t_cmake_test @@ -234,6 +239,7 @@ task_groups: - t_compile - t_mongo_server - t_python_test + - t_lint_python - t_cmake_test - t_integration_test_standalone - t_integration_test_single_node_replset @@ -503,6 +509,31 @@ functions: lint-workloads + ## + # Lint python + ## + f_lint_python: + - command: shell.exec + params: + continue_on_err: true + working_dir: src + shell: bash + script: | + set -eo pipefail + + export PATH=/opt/mongodbtoolchain/v3/bin:$PATH + + python3 -m virtualenv venv + source venv/bin/activate + + # Install Genny python scripts into the virtualenv + pushd src/python + python3 setup.py install + popd + + black --check src/python src/lamplib + + ## # Reports test results to evergreen API. # diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..97f50372bb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.black] +line-length = 100 +target-version = ['py37'] diff --git a/src/lamplib/__main__.py b/src/lamplib/__main__.py index b0b5734622..7828a1d718 100644 --- a/src/lamplib/__main__.py +++ b/src/lamplib/__main__.py @@ -11,39 +11,49 @@ from context import Context from parser import add_args_to_context + def check_venv(args): - if not 'VIRTUAL_ENV' in os.environ and not args.run_global: - logging.error('Tried to execute without active virtualenv. If you want to run lamp ' - 'without a virtualenv, use the --run-global option.') + if not "VIRTUAL_ENV" in os.environ and not args.run_global: + logging.error( + "Tried to execute without active virtualenv. If you want to run lamp " + "without a virtualenv, use the --run-global option." + ) sys.exit(1) + def run_self_test(): - res = subprocess.run(['python3', '-m', 'unittest'], - cwd=os.path.dirname(os.path.abspath(__file__))) + res = subprocess.run( + ["python3", "-m", "unittest"], cwd=os.path.dirname(os.path.abspath(__file__)) + ) res.check_returncode() sys.exit(0) + def python_version_string(): - return '.'.join(map(str, sys.version_info))[0:5] + return ".".join(map(str, sys.version_info))[0:5] + def validate_environment(): # Check Python version if not sys.version_info >= (3, 7): - raise OSError('Detected Python version {version} less than 3.7. Please delete ' - 'the virtualenv and run lamp again.'.format(version=python_version_string())) + raise OSError( + "Detected Python version {version} less than 3.7. Please delete " + "the virtualenv and run lamp again.".format(version=python_version_string()) + ) # Check the macOS version. Non-mac platforms return a tuple of empty strings # for platform.mac_ver(). if platform.mac_ver()[0]: - release_triplet = platform.mac_ver()[0].split('.') + release_triplet = platform.mac_ver()[0].split(".") if int(release_triplet[1]) < 14: # You could technically compile clang or gcc yourself on an older version # of macOS, but it's untested so we might as well just enforce # a blanket minimum macOS version for simplicity. - logging.error('Genny requires macOS 10.14 Mojave or newer') + logging.error("Genny requires macOS 10.14 Mojave or newer") sys.exit(1) return + def main(): validate_environment() @@ -55,11 +65,11 @@ def main(): # Pass around Context instead of using the global one to facilitate testing. context = Context - check_venv(args) + check_venv(args) # Execute the minimum amount of code possible to run self tests to minimize # untestable code (i.e. code that runs the self-test). - if args.subcommand == 'self-test': + if args.subcommand == "self-test": run_self_test() toolchain_downloader = ToolchainDownloader(os_family, args.linux_distro) @@ -73,29 +83,33 @@ def main(): sys.exit(1) curator_path = curator_downloader.result_dir - if not args.subcommand: - logging.info('No subcommand specified; running cmake, compile and install') - tasks.cmake(context, toolchain_dir=toolchain_dir, - env=compile_env, cmdline_cmake_args=cmake_args) + logging.info("No subcommand specified; running cmake, compile and install") + tasks.cmake( + context, toolchain_dir=toolchain_dir, env=compile_env, cmdline_cmake_args=cmake_args + ) tasks.compile_all(context, compile_env) tasks.install(context, compile_env) - elif args.subcommand == 'clean': + elif args.subcommand == "clean": tasks.clean(context, compile_env) else: tasks.compile_all(context, compile_env) - if args.subcommand == 'install': + if args.subcommand == "install": tasks.install(context, compile_env) - elif args.subcommand == 'cmake-test': + elif args.subcommand == "cmake-test": tasks.run_tests.cmake_test(compile_env) - elif args.subcommand == 'benchmark-test': + elif args.subcommand == "benchmark-test": tasks.run_tests.benchmark_test(compile_env) - elif args.subcommand == 'resmoke-test': - tasks.run_tests.resmoke_test(compile_env, suites=args.resmoke_suites, - mongo_dir=args.resmoke_mongo_dir, is_cnats=args.resmoke_cnats) + elif args.subcommand == "resmoke-test": + tasks.run_tests.resmoke_test( + compile_env, + suites=args.resmoke_suites, + mongo_dir=args.resmoke_mongo_dir, + is_cnats=args.resmoke_cnats, + ) else: - raise ValueError('Unknown subcommand: ', args.subcommand) + raise ValueError("Unknown subcommand: ", args.subcommand) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/src/lamplib/context.py b/src/lamplib/context.py index 8098c606da..9d6713d125 100644 --- a/src/lamplib/context.py +++ b/src/lamplib/context.py @@ -2,38 +2,35 @@ import os # Map of platform.system() to vcpkg's OS names. -_triplet_os_map = { - 'Darwin': 'osx', - 'Linux': 'linux', - 'NT': 'windows' -} +_triplet_os_map = {"Darwin": "osx", "Linux": "linux", "NT": "windows"} # Define complex operations as private methods on the module to keep the # public Context object clean. def _create_compile_environment(triplet_os, toolchain_dir): env = os.environ.copy() - paths = [env['PATH']] + paths = [env["PATH"]] # For mongodbtoolchain compiler (if there). - paths.insert(0, '/opt/mongodbtoolchain/v3/bin') + paths.insert(0, "/opt/mongodbtoolchain/v3/bin") # For cmake and ctest cmake_bin_relative_dir = { - 'linux': 'downloads/tools/cmake-3.13.3-linux/cmake-3.13.3-Linux-x86_64/bin', - 'osx': 'downloads/tools/cmake-3.13.3-osx/cmake-3.13.3-Darwin-x86_64/CMake.app/Contents/bin' + "linux": "downloads/tools/cmake-3.13.3-linux/cmake-3.13.3-Linux-x86_64/bin", + "osx": "downloads/tools/cmake-3.13.3-osx/cmake-3.13.3-Darwin-x86_64/CMake.app/Contents/bin", }[triplet_os] paths.insert(0, os.path.join(toolchain_dir, cmake_bin_relative_dir)) # For ninja - ninja_bin_dir = os.path.join(toolchain_dir, - 'downloads/tools/ninja-1.8.2-{}:'.format(triplet_os)) + ninja_bin_dir = os.path.join( + toolchain_dir, "downloads/tools/ninja-1.8.2-{}:".format(triplet_os) + ) paths.insert(0, ninja_bin_dir) - env['PATH'] = ':'.join(paths) - env['NINJA_STATUS'] = '[%f/%t (%p) %es] ' # make the ninja output even nicer + env["PATH"] = ":".join(paths) + env["NINJA_STATUS"] = "[%f/%t (%p) %es] " # make the ninja output even nicer - logging.debug('Using environment: %s', env) + logging.debug("Using environment: %s", env) return env @@ -53,10 +50,12 @@ def get_compile_environment(toolchain_dir=None): if not Context._compile_environment: if not toolchain_dir: raise ValueError( - 'toolchain_dir must be specified when getting the compile environment for the ' - 'first time') - Context._compile_environment = _create_compile_environment(Context.TRIPLET_OS, - toolchain_dir) + "toolchain_dir must be specified when getting the compile environment for the " + "first time" + ) + Context._compile_environment = _create_compile_environment( + Context.TRIPLET_OS, toolchain_dir + ) return Context._compile_environment # Helper methods diff --git a/src/lamplib/parser.py b/src/lamplib/parser.py index 64d693eb4e..089f205e21 100644 --- a/src/lamplib/parser.py +++ b/src/lamplib/parser.py @@ -6,56 +6,77 @@ def parse_args(args, os_family): parser = argparse.ArgumentParser( - description='Script for building genny', - epilog='Unknown positional arguments will be forwarded verbatim to the cmake' - ' invocation where relevant' + description="Script for building genny", + epilog="Unknown positional arguments will be forwarded verbatim to the cmake" + " invocation where relevant", ) # Python can't natively check the distros of our supported platforms. # See https://bugs.python.org/issue18872 for more info. - parser.add_argument('-d', '--linux-distro', - choices=['ubuntu1804', 'archlinux', 'rhel8', 'rhel70', 'rhel62', - 'amazon2', 'not-linux'], - help='specify the linux distro you\'re on; if your system isn\'t available,' - ' please contact us at #workload-generation') - parser.add_argument('-v', '--verbose', action='store_true') - parser.add_argument('-g', '--run-global', action='store_true', - help='allow installation outside of a virtualenv') - parser.add_argument('-i', '--ignore-toolchain-version', action='store_true', - help='ignore the toolchain version, useful for testing toolchain changes') - parser.add_argument('-b', '--build-system', - choices=['make', 'ninja'], default='ninja', - help='Which build-system to use for compilation. May need to use make for ' - 'IDEs.') - parser.add_argument('-s', '--sanitizer', choices=['asan', 'tsan', 'ubsan']) + parser.add_argument( + "-d", + "--linux-distro", + choices=["ubuntu1804", "archlinux", "rhel8", "rhel70", "rhel62", "amazon2", "not-linux"], + help="specify the linux distro you're on; if your system isn't available," + " please contact us at #workload-generation", + ) + parser.add_argument("-v", "--verbose", action="store_true") + parser.add_argument( + "-g", "--run-global", action="store_true", help="allow installation outside of a virtualenv" + ) + parser.add_argument( + "-i", + "--ignore-toolchain-version", + action="store_true", + help="ignore the toolchain version, useful for testing toolchain changes", + ) + parser.add_argument( + "-b", + "--build-system", + choices=["make", "ninja"], + default="ninja", + help="Which build-system to use for compilation. May need to use make for " "IDEs.", + ) + parser.add_argument("-s", "--sanitizer", choices=["asan", "tsan", "ubsan"]) subparsers = parser.add_subparsers( - dest='subcommand', - description='subcommands perform specific actions; make sure you run this script without ' - 'any subcommand first to initialize the environment') + dest="subcommand", + description="subcommands perform specific actions; make sure you run this script without " + "any subcommand first to initialize the environment", + ) subparsers.add_parser( - 'cmake-test', help='run cmake unit tests that don\'t connect to a MongoDB cluster') - subparsers.add_parser('benchmark-test', help='run benchmark unit tests') + "cmake-test", help="run cmake unit tests that don't connect to a MongoDB cluster" + ) + subparsers.add_parser("benchmark-test", help="run benchmark unit tests") resmoke_test_parser = subparsers.add_parser( - 'resmoke-test', help='run cmake unit tests that connect to a MongoDB cluster') + "resmoke-test", help="run cmake unit tests that connect to a MongoDB cluster" + ) group = resmoke_test_parser.add_mutually_exclusive_group() - group.add_argument('--suites', dest='resmoke_suites', - help='equivalent to resmoke.py\'s "--suites" option') - group.add_argument('--create-new-actor-test-suite', action='store_true', dest='resmoke_cnats', - help='Run the "genny_create_new_actor" resmoke test suite,' - ' incompatible with the --suites options') - resmoke_test_parser.add_argument('--mongo-dir', dest='resmoke_mongo_dir', - help='path to the mongo repo, which contains buildscripts/resmoke.py') + group.add_argument( + "--suites", dest="resmoke_suites", help='equivalent to resmoke.py\'s "--suites" option' + ) + group.add_argument( + "--create-new-actor-test-suite", + action="store_true", + dest="resmoke_cnats", + help='Run the "genny_create_new_actor" resmoke test suite,' + " incompatible with the --suites options", + ) + resmoke_test_parser.add_argument( + "--mongo-dir", + dest="resmoke_mongo_dir", + help="path to the mongo repo, which contains buildscripts/resmoke.py", + ) - subparsers.add_parser('install', help='just run the install step for genny') - subparsers.add_parser('clean', help='cleanup existing build') - subparsers.add_parser('self-test', help='run lamplib unittests') + subparsers.add_parser("install", help="just run the install step for genny") + subparsers.add_parser("clean", help="cleanup existing build") + subparsers.add_parser("self-test", help="run lamplib unittests") known_args, unknown_args = parser.parse_known_args(args) - if os_family == 'Linux' and not known_args.subcommand and not known_args.linux_distro: - raise ValueError('--linux-distro must be specified on Linux') + if os_family == "Linux" and not known_args.subcommand and not known_args.linux_distro: + raise ValueError("--linux-distro must be specified on Linux") return known_args, unknown_args diff --git a/src/lamplib/tasks/__init__.py b/src/lamplib/tasks/__init__.py index b0cb0a998a..b1764612f2 100644 --- a/src/lamplib/tasks/__init__.py +++ b/src/lamplib/tasks/__init__.py @@ -3,70 +3,73 @@ import sys import subprocess + def _sanitizer_flags(context): if context.SANITIZER is None: return [] - if context.SANITIZER == 'asan': - return ['-DCMAKE_CXX_FLAGS=-pthread -fsanitize=address -O1 -fno-omit-frame-pointer -g'] - elif context.SANITIZER == 'tsan': - return ['-DCMAKE_CXX_FLAGS=-pthread -fsanitize=thread -g -O1'] - elif context.SANITIZER == 'ubsan': - return ['-DCMAKE_CXX_FLAGS=-pthread -fsanitize=undefined -g -O1'] + if context.SANITIZER == "asan": + return ["-DCMAKE_CXX_FLAGS=-pthread -fsanitize=address -O1 -fno-omit-frame-pointer -g"] + elif context.SANITIZER == "tsan": + return ["-DCMAKE_CXX_FLAGS=-pthread -fsanitize=thread -g -O1"] + elif context.SANITIZER == "ubsan": + return ["-DCMAKE_CXX_FLAGS=-pthread -fsanitize=undefined -g -O1"] # arg parser should prevent us from getting here - raise ValueError('Unknown sanitizer {}'.format(context.SANITIZER)) + raise ValueError("Unknown sanitizer {}".format(context.SANITIZER)) + def cmake(context, toolchain_dir, env, cmdline_cmake_args): - generators = { - 'make': 'Unix Makefiles', - 'ninja': 'Ninja' - } - cmake_cmd = ['cmake', '-B', 'build', '-G', generators[context.BUILD_SYSTEM]] + generators = {"make": "Unix Makefiles", "ninja": "Ninja"} + cmake_cmd = ["cmake", "-B", "build", "-G", generators[context.BUILD_SYSTEM]] # We set both the prefix path and the toolchain file here as a hack to allow cmake # to find both shared and static libraries. vcpkg doesn't natively support a project # using both. - cmake_prefix_path = os.path.join(toolchain_dir, 'installed/x64-{}-shared'.format(context.TRIPLET_OS)) - cmake_toolchain_file = os.path.join(toolchain_dir, 'scripts/buildsystems/vcpkg.cmake') + cmake_prefix_path = os.path.join( + toolchain_dir, "installed/x64-{}-shared".format(context.TRIPLET_OS) + ) + cmake_toolchain_file = os.path.join(toolchain_dir, "scripts/buildsystems/vcpkg.cmake") cmake_cmd += [ - '-DCMAKE_PREFIX_PATH={}'.format(cmake_prefix_path), - '-DCMAKE_TOOLCHAIN_FILE={}'.format(cmake_toolchain_file), - '-DVCPKG_TARGET_TRIPLET=x64-{}-static'.format(context.TRIPLET_OS), + "-DCMAKE_PREFIX_PATH={}".format(cmake_prefix_path), + "-DCMAKE_TOOLCHAIN_FILE={}".format(cmake_toolchain_file), + "-DVCPKG_TARGET_TRIPLET=x64-{}-static".format(context.TRIPLET_OS), ] cmake_cmd += _sanitizer_flags(context) cmake_cmd += cmdline_cmake_args - logging.info('Running cmake: %s', ' '.join(cmake_cmd)) + logging.info("Running cmake: %s", " ".join(cmake_cmd)) subprocess.run(cmake_cmd, env=env) + def compile_all(context, env): - compile_cmd = [context.BUILD_SYSTEM, '-C', 'build'] - logging.info('Compiling: %s', ' '.join(compile_cmd)) + compile_cmd = [context.BUILD_SYSTEM, "-C", "build"] + logging.info("Compiling: %s", " ".join(compile_cmd)) subprocess.run(compile_cmd, env=env) + def install(context, env): - - install_cmd = [context.BUILD_SYSTEM, '-C', 'build', 'install'] - logging.info('Running install: %s', ' '.join(install_cmd)) + + install_cmd = [context.BUILD_SYSTEM, "-C", "build", "install"] + logging.info("Running install: %s", " ".join(install_cmd)) subprocess.run(install_cmd, env=env) # Install python scripts - python_cmd = ['python3', 'setup.py', 'develop'] - subprocess.run(python_cmd, cwd='src/python') + python_cmd = ["python3", "setup.py", "develop"] + subprocess.run(python_cmd, cwd="src/python") def clean(context, env): - clean_cmd = [context.BUILD_SYSTEM, '-C', 'build', 'clean'] - logging.info('Running clean: %s', ' '.join(clean_cmd)) + clean_cmd = [context.BUILD_SYSTEM, "-C", "build", "clean"] + logging.info("Running clean: %s", " ".join(clean_cmd)) subprocess.run(clean_cmd, env=env) # Physically remove all built files. - logging.info('Erasing `build/` and `dist/`') - subprocess.run(['rm', '-rf', 'build'], env=env) - subprocess.run(['rm', '-rf', 'dist'], env=env) + logging.info("Erasing `build/` and `dist/`") + subprocess.run(["rm", "-rf", "build"], env=env) + subprocess.run(["rm", "-rf", "dist"], env=env) # Put back build/.gitinore - subprocess.run(['git', 'checkout', '--', 'build'], env=env) + subprocess.run(["git", "checkout", "--", "build"], env=env) diff --git a/src/lamplib/tasks/download.py b/src/lamplib/tasks/download.py index 278a734b1b..80d4f2a59a 100644 --- a/src/lamplib/tasks/download.py +++ b/src/lamplib/tasks/download.py @@ -7,6 +7,7 @@ from context import Context + class Downloader: """ Abstract base class for things that can be downloaded. @@ -37,8 +38,12 @@ def fetch_and_install(self): """ if not os.path.exists(self._install_dir): logging.critical( - 'Please create the parent directory for %s: ' - '`sudo mkdir -p "%s"; sudo chown "$USER" "%s"`', self._name, self._install_dir, self._install_dir) + "Please create the parent directory for %s: " + '`sudo mkdir -p "%s"; sudo chown "$USER" "%s"`', + self._name, + self._install_dir, + self._install_dir, + ) return False if not os.path.isdir(self._install_dir): @@ -47,67 +52,78 @@ def fetch_and_install(self): if not os.access(self._install_dir, os.W_OK): logging.critical( - 'Please ensure you have write access to the parent directory for %s: ' - '`sudo chown $USER %s`', self._name, self._install_dir) + "Please ensure you have write access to the parent directory for %s: " + "`sudo chown $USER %s`", + self._name, + self._install_dir, + ) return False self.result_dir = os.path.join(self._install_dir, self._name) if self._can_ignore(): - logging.info('Skipping installing the %s into: %s', self._name, self.result_dir) + logging.info("Skipping installing the %s into: %s", self._name, self.result_dir) else: - tarball = os.path.join(self._install_dir, self._name + '.tgz') + tarball = os.path.join(self._install_dir, self._name + ".tgz") if os.path.isfile(tarball): - logging.info('Skipping downloading %s', tarball) + logging.info("Skipping downloading %s", tarball) else: - logging.info('Downloading %s, please wait...', self._name) + logging.info("Downloading %s, please wait...", self._name) url = self._get_url() urllib.request.urlretrieve(url, tarball) - logging.info('Finished Downloading %s as %s', self._name, tarball) + logging.info("Finished Downloading %s as %s", self._name, tarball) - logging.info('Extracting %s into %s, please wait...', self._name, self.result_dir) + logging.info("Extracting %s into %s, please wait...", self._name, self.result_dir) shutil.rmtree(self.result_dir, ignore_errors=True) os.mkdir(self.result_dir) # use tar(1) because python's TarFile was inexplicably truncating the tarball - subprocess.run(['tar', '-xzf', tarball, '-C', self.result_dir], check=True) - logging.info('Finished extracting %s into %s', self._name, self.result_dir) + subprocess.run(["tar", "-xzf", tarball, "-C", self.result_dir], check=True) + logging.info("Finished extracting %s into %s", self._name, self.result_dir) # Get space back. os.remove(tarball) return True - def _get_url(self): raise NotImplementedError def _can_ignore(self): raise NotImplementedError + class ToolchainDownloader(Downloader): # These build IDs are from the genny-toolchain Evergreen task. # https://evergreen.mongodb.com/waterfall/genny-toolchain - TOOLCHAIN_BUILD_ID = 'cd5a4031b1dc93e47d598ad41521fd2e8aa865a0_20_02_12_20_07_30' - TOOLCHAIN_GIT_HASH = TOOLCHAIN_BUILD_ID.split('_')[0] - TOOLCHAIN_ROOT = '/data/mci' # TODO BUILD-7624 change this to /opt. + TOOLCHAIN_BUILD_ID = "cd5a4031b1dc93e47d598ad41521fd2e8aa865a0_20_02_12_20_07_30" + TOOLCHAIN_GIT_HASH = TOOLCHAIN_BUILD_ID.split("_")[0] + TOOLCHAIN_ROOT = "/data/mci" # TODO BUILD-7624 change this to /opt. def __init__(self, os_family, distro): super().__init__(os_family, distro, ToolchainDownloader.TOOLCHAIN_ROOT, "gennytoolchain") def _get_url(self): - if self._os_family == 'Darwin': - self._distro = 'macos_1014' + if self._os_family == "Darwin": + self._distro = "macos_1014" - return 'https://s3.amazonaws.com/mciuploads/genny-toolchain/' \ - 'genny_toolchain_{}_{}/gennytoolchain.tgz'.format(self._distro, ToolchainDownloader.TOOLCHAIN_BUILD_ID) + return ( + "https://s3.amazonaws.com/mciuploads/genny-toolchain/" + "genny_toolchain_{}_{}/gennytoolchain.tgz".format( + self._distro, ToolchainDownloader.TOOLCHAIN_BUILD_ID + ) + ) def _can_ignore(self): # If the toolchain dir is outdated or we ignore the toolchain version. - return os.path.exists(self.result_dir) and (Context.IGNORE_TOOLCHAIN_VERSION or self._check_toolchain_githash()) + return os.path.exists(self.result_dir) and ( + Context.IGNORE_TOOLCHAIN_VERSION or self._check_toolchain_githash() + ) def _check_toolchain_githash(self): - res = subprocess.run(['git', 'rev-parse', 'HEAD'], cwd=self.result_dir, capture_output=True, text=True) + res = subprocess.run( + ["git", "rev-parse", "HEAD"], cwd=self.result_dir, capture_output=True, text=True + ) return res.stdout.strip() == ToolchainDownloader.TOOLCHAIN_GIT_HASH @@ -117,30 +133,36 @@ class CuratorDownloader(Downloader): # Note that DSI also downloads Curator, the location is specified in defaults.yml. # Please try to keep the two versions consistent. - CURATOR_VERSION = '198c374fe4cb245570858f07c6d4d2e4ae13ede5' + CURATOR_VERSION = "198c374fe4cb245570858f07c6d4d2e4ae13ede5" CURATOR_ROOT = os.getcwd() def __init__(self, os_family, distro): super().__init__(os_family, distro, CuratorDownloader.CURATOR_ROOT, "curator") def _get_url(self): - if self._os_family == 'Darwin': - self._distro = 'macos' + if self._os_family == "Darwin": + self._distro = "macos" - if 'ubuntu' in self._distro: - self._distro = 'ubuntu1604' + if "ubuntu" in self._distro: + self._distro = "ubuntu1604" - if self._distro in ('amazon2', 'rhel8', 'rhel62'): - self._distro = 'rhel70' + if self._distro in ("amazon2", "rhel8", "rhel62"): + self._distro = "rhel70" - return 'https://s3.amazonaws.com/boxes.10gen.com/build/curator/' \ - 'curator-dist-{distro}-{build}.tar.gz'.format(distro=self._distro, build=CuratorDownloader.CURATOR_VERSION) + return ( + "https://s3.amazonaws.com/boxes.10gen.com/build/curator/" + "curator-dist-{distro}-{build}.tar.gz".format( + distro=self._distro, build=CuratorDownloader.CURATOR_VERSION + ) + ) def _can_ignore(self): - return os.path.exists(self.result_dir) and (Context.IGNORE_TOOLCHAIN_VERSION or self._check_curator_version()) + return os.path.exists(self.result_dir) and ( + Context.IGNORE_TOOLCHAIN_VERSION or self._check_curator_version() + ) def _check_curator_version(self): - res = subprocess.run(['./curator', '-v'], cwd=self.result_dir, capture_output=True, text=True) + res = subprocess.run( + ["./curator", "-v"], cwd=self.result_dir, capture_output=True, text=True + ) return res.stdout.split()[2] == CuratorDownloader.CURATOR_VERSION - - diff --git a/src/lamplib/tasks/run_tests.py b/src/lamplib/tasks/run_tests.py index 9497cf3964..a86eb3b9b1 100644 --- a/src/lamplib/tasks/run_tests.py +++ b/src/lamplib/tasks/run_tests.py @@ -22,9 +22,9 @@ def _run_command_with_sentinel_report(cmd_func, checker_func=None): - sentinel_file = os.path.join(os.getcwd(), 'build', 'sentinel.junit.xml') + sentinel_file = os.path.join(os.getcwd(), "build", "sentinel.junit.xml") - with open(sentinel_file, 'w') as f: + with open(sentinel_file, "w") as f: f.write(_sentinel_report) res = cmd_func() @@ -32,26 +32,26 @@ def _run_command_with_sentinel_report(cmd_func, checker_func=None): if checker_func: success = checker_func() else: - success = (res.returncode == 0) + success = res.returncode == 0 if success: - logging.debug('Test succeeded, removing sentinel report') + logging.debug("Test succeeded, removing sentinel report") os.remove(sentinel_file) else: - logging.debug('Test failed, leaving sentinel report in place') + logging.debug("Test failed, leaving sentinel report in place") def cmake_test(env): # This can only be imported after the setup script has installed gennylib. from gennylib.genny_runner import poplar_grpc - workdir = os.path.join(os.getcwd(), 'build') + workdir = os.path.join(os.getcwd(), "build") ctest_cmd = [ - 'ctest', - '--verbose', - '--label-exclude', - '(standalone|sharded|single_node_replset|three_node_replset|benchmark)' + "ctest", + "--verbose", + "--label-exclude", + "(standalone|sharded|single_node_replset|three_node_replset|benchmark)", ] with poplar_grpc(): @@ -59,9 +59,9 @@ def cmake_test(env): def benchmark_test(env): - workdir = os.path.join(os.getcwd(), 'build') + workdir = os.path.join(os.getcwd(), "build") - ctest_cmd = ['ctest', '--label-regex', '(benchmark)'] + ctest_cmd = ["ctest", "--label-regex", "(benchmark)"] _run_command_with_sentinel_report(lambda: subprocess.run(ctest_cmd, cwd=workdir, env=env)) @@ -69,13 +69,13 @@ def benchmark_test(env): def _check_create_new_actor_test_report(workdir): passed = False - report_file = os.path.join(workdir, 'build', 'create_new_actor_test.junit.xml') + report_file = os.path.join(workdir, "build", "create_new_actor_test.junit.xml") if not os.path.isfile(report_file): - logging.error('Failed to find report file: %s', report_file) + logging.error("Failed to find report file: %s", report_file) return passed - expected_error = "failure message=\"100 == 101\"" + expected_error = 'failure message="100 == 101"' with open(report_file) as f: report = f.read() @@ -84,8 +84,11 @@ def _check_create_new_actor_test_report(workdir): if passed: os.remove(report_file) # Remove the report file for the expected failure. else: - logging.error('test for create-new-actor script did not succeed. Failed to find expected ' - 'error message %s in report file', expected_error) + logging.error( + "test for create-new-actor script did not succeed. Failed to find expected " + "error message %s in report file", + expected_error, + ) return passed @@ -95,7 +98,7 @@ def resmoke_test(env, suites, mongo_dir, is_cnats): checker_func = None if is_cnats: - suites = os.path.join(workdir, 'src', 'resmokeconfig', 'genny_create_new_actor.yml') + suites = os.path.join(workdir, "src", "resmokeconfig", "genny_create_new_actor.yml") checker_func = lambda: _check_create_new_actor_test_report(workdir) if (not suites) and (not is_cnats): @@ -103,19 +106,24 @@ def resmoke_test(env, suites, mongo_dir, is_cnats): if not mongo_dir: # Default mongo directory in Evergreen. - mongo_dir = os.path.join(workdir, 'build', 'mongo') + mongo_dir = os.path.join(workdir, "build", "mongo") # Default download location for MongoDB binaries. - env['PATH'] += ':' + os.path.join(mongo_dir, 'bin') + ':' + mongo_dir + env["PATH"] += ":" + os.path.join(mongo_dir, "bin") + ":" + mongo_dir - evg_venv_dir = os.path.join(workdir, 'build', 'venv') + evg_venv_dir = os.path.join(workdir, "build", "venv") cmds = [] if os.path.isdir(evg_venv_dir): - cmds.append('source ' + os.path.join(evg_venv_dir, 'bin', 'activate')) + cmds.append("source " + os.path.join(evg_venv_dir, "bin", "activate")) cmds.append( - 'python ' + os.path.join(mongo_dir, 'buildscripts', 'resmoke.py') + ' --suite ' + suites + - ' --mongod mongod --mongo mongo --mongos mongos') + "python " + + os.path.join(mongo_dir, "buildscripts", "resmoke.py") + + " --suite " + + suites + + " --mongod mongod --mongo mongo --mongos mongos" + ) _run_command_with_sentinel_report( - lambda: subprocess.run(';'.join(cmds), cwd=workdir, env=env, shell=True), checker_func) + lambda: subprocess.run(";".join(cmds), cwd=workdir, env=env, shell=True), checker_func + ) diff --git a/src/lamplib/tests/test_run_tests.py b/src/lamplib/tests/test_run_tests.py index ede0263428..b4f97a76fa 100644 --- a/src/lamplib/tests/test_run_tests.py +++ b/src/lamplib/tests/test_run_tests.py @@ -7,13 +7,12 @@ class TestRunTests(unittest.TestCase): - - @patch('subprocess.run') + @patch("subprocess.run") def test_cmake_test(self, mock_subprocess_run): with tempfile.TemporaryDirectory() as temp_dir: - expected_file = os.path.join(temp_dir, 'build', 'sentinel.junit.xml') + expected_file = os.path.join(temp_dir, "build", "sentinel.junit.xml") os.chdir(temp_dir) - os.mkdir('build') # Simulate build dir in the genny repo. + os.mkdir("build") # Simulate build dir in the genny repo. def fail(*args, **kwargs): res = unittest.mock.Mock() diff --git a/src/python/gennylib/cedar_report.py b/src/python/gennylib/cedar_report.py index bde66389a8..4083ff50a5 100644 --- a/src/python/gennylib/cedar_report.py +++ b/src/python/gennylib/cedar_report.py @@ -12,56 +12,58 @@ from gennylib.parsers import cedar -CedarBucketConfig = namedtuple('CedarBucketConfig', [ - 'api_key', - 'api_secret', - 'api_token', - 'region', - 'name' -]) - -CedarTestArtifact = namedtuple('CedarTestArtifact', [ - 'bucket', - 'path', - 'tags', # [str] - 'local_path', - 'created_at', - 'convert_bson_to_ftdc', - 'prefix', - 'permissions' -]) - -CedarTestInfo = namedtuple('CedarTestInfo', [ - 'test_name', - 'trial', - 'tags', # [str] - 'args' # {str: str} -]) - -CedarTest = namedtuple('CedarTest', [ - 'info', # CedarTestInfo - 'created_at', - 'completed_at', - 'artifacts', # [CedarTestArtifact] - 'metrics', # unused - 'sub_tests' # unused -]) - -CedarReport = namedtuple('CedarReport', [ - 'project', - 'version', - 'order', - 'variant', - 'task_name', - 'task_id', - 'execution_number', - 'mainline', - 'tests', # [CedarTest] - 'bucket' # BucketConfig -]) - -DEFAULT_REPORT_FILE = 'cedar_report.json' -DEFAULT_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ' +CedarBucketConfig = namedtuple( + "CedarBucketConfig", ["api_key", "api_secret", "api_token", "region", "name"] +) + +CedarTestArtifact = namedtuple( + "CedarTestArtifact", + [ + "bucket", + "path", + "tags", # [str] + "local_path", + "created_at", + "convert_bson_to_ftdc", + "prefix", + "permissions", + ], +) + +CedarTestInfo = namedtuple( + "CedarTestInfo", ["test_name", "trial", "tags", "args"] # [str] # {str: str} +) + +CedarTest = namedtuple( + "CedarTest", + [ + "info", # CedarTestInfo + "created_at", + "completed_at", + "artifacts", # [CedarTestArtifact] + "metrics", # unused + "sub_tests", # unused + ], +) + +CedarReport = namedtuple( + "CedarReport", + [ + "project", + "version", + "order", + "variant", + "task_name", + "task_id", + "execution_number", + "mainline", + "tests", # [CedarTest] + "bucket", # BucketConfig + ], +) + +DEFAULT_REPORT_FILE = "cedar_report.json" +DEFAULT_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ" class _Config(object): @@ -71,31 +73,31 @@ class _Config(object): def __init__(self, env, metrics_file_names, test_run_time): # EVG related. - self.project = env['project'] - self.version = env['version_id'] - self.variant = env['build_variant'] - self.task_name = env['task_name'] - self.task_id = env['task_id'] - self.execution_number = int(env['execution']) + self.project = env["project"] + self.version = env["version_id"] + self.variant = env["build_variant"] + self.task_name = env["task_name"] + self.task_id = env["task_id"] + self.execution_number = int(env["execution"]) # This env var is either the string "true" or unset. - self.mainline = not (env.get('is_patch', '') == 'true') + self.mainline = not (env.get("is_patch", "") == "true") try: - self.order = int(env['revision_order_id']) + self.order = int(env["revision_order_id"]) except (ValueError, TypeError): self.order = None # We set these for convenience. - self.test_name = env['test_name'] + self.test_name = env["test_name"] self.metrics_file_names = metrics_file_names self.test_run_time = test_run_time self.now = datetime.datetime.utcnow() # AWS related. - self.api_key = env['terraform_key'] - self.api_secret = env['terraform_secret'] - self.cloud_region = 'us-east-1' # N. Virginia. - self.cloud_bucket = 'genny-metrics' + self.api_key = env["terraform_key"] + self.api_secret = env["terraform_secret"] + self.cloud_region = "us-east-1" # N. Virginia. + self.cloud_bucket = "genny-metrics" @property def created_at(self): @@ -105,7 +107,7 @@ def created_at(self): def build_report(config): sub_tests = [] - bucket_prefix = '{}_{}'.format(config.task_id, config.execution_number) + bucket_prefix = "{}_{}".format(config.task_id, config.execution_number) for path in config.metrics_file_names: base_name = os.path.basename(path) @@ -118,16 +120,11 @@ def build_report(config): local_path=path, created_at=config.created_at, convert_bson_to_ftdc=True, - permissions='public-read', - prefix=bucket_prefix + permissions="public-read", + prefix=bucket_prefix, ) - ti = CedarTestInfo( - test_name=test_name, - trial=0, - tags=[], - args={} - ) + ti = CedarTestInfo(test_name=test_name, trial=0, tags=[], args={}) t = CedarTest( info=ti._asdict(), @@ -135,12 +132,10 @@ def build_report(config): completed_at=config.now, artifacts=[a._asdict()], metrics=None, - sub_tests=None + sub_tests=None, ) sub_tests.append(t._asdict()) - - bucket_config = CedarBucketConfig( api_key=config.api_key, api_secret=config.api_secret, @@ -149,12 +144,7 @@ def build_report(config): name=config.cloud_bucket, ) - test_info = CedarTestInfo( - test_name=config.test_name, - trial=0, - tags=[], - args={} - ) + test_info = CedarTestInfo(test_name=config.test_name, trial=0, tags=[], args={}) test = CedarTest( info=test_info._asdict(), @@ -162,7 +152,7 @@ def build_report(config): completed_at=config.now, artifacts=[], metrics=None, - sub_tests=sub_tests + sub_tests=sub_tests, ) report = CedarReport( @@ -175,7 +165,7 @@ def build_report(config): execution_number=config.execution_number, mainline=config.mainline, tests=[test._asdict()], - bucket=bucket_config._asdict() + bucket=bucket_config._asdict(), ) return report._asdict() @@ -185,10 +175,7 @@ class CertRetriever(object): """Retrieves client certificate and key from the cedar API using Jira username and password.""" def __init__(self, username, password): - self.auth = json.dumps({ - 'username': username, - 'password': password - }) + self.auth = json.dumps({"username": username, "password": password}) @staticmethod def _fetch(url, output, **kwargs): @@ -196,7 +183,7 @@ def _fetch(url, output, **kwargs): return output resp = requests.get(url, **kwargs) resp.raise_for_status() - with open(output, 'w') as pem: + with open(output, "w") as pem: pem.write(resp.text) return output @@ -204,25 +191,27 @@ def root_ca(self): """ :return: the root cert authority pem file from cedar """ - return self._fetch('https://cedar.mongodb.com/rest/v1/admin/ca', 'cedar.ca.pem') + return self._fetch("https://cedar.mongodb.com/rest/v1/admin/ca", "cedar.ca.pem") def user_cert(self): """ :return: the user-level pem """ return self._fetch( - 'https://cedar.mongodb.com/rest/v1/admin/users/certificate', - 'cedar.user.crt', - data=self.auth) + "https://cedar.mongodb.com/rest/v1/admin/users/certificate", + "cedar.user.crt", + data=self.auth, + ) def user_key(self): """ :return: the user-level key """ return self._fetch( - 'https://cedar.mongodb.com/rest/v1/admin/users/certificate/key', - 'cedar.user.key', - data=self.auth) + "https://cedar.mongodb.com/rest/v1/admin/users/certificate/key", + "cedar.user.key", + data=self.auth, + ) class ShellCuratorRunner(object): @@ -241,18 +230,18 @@ def get_send_command(self): """ command = [ - 'curator', - 'poplar', - 'send', - '--service', - 'cedar.mongodb.com:7070', - '--cert', + "curator", + "poplar", + "send", + "--service", + "cedar.mongodb.com:7070", + "--cert", self.retriever.user_cert(), - '--key', + "--key", self.retriever.user_key(), - '--ca', + "--ca", self.retriever.root_ca(), - '--path', + "--path", self.report_file, ] return command @@ -271,11 +260,19 @@ def run(cmd): def build_parser(): parser = cedar.build_parser() parser.description += " and create a cedar report" - parser.add_argument('--report-file', default=DEFAULT_REPORT_FILE, help='path to generated report file') - parser.add_argument('--test-name', help='human friendly name for this test, defaults to the ' - 'EVG_task_name environment variable') - parser.add_argument('--expansions-file', default='expansions.yml', - help='expansions-file with configuration needed by cedar') + parser.add_argument( + "--report-file", default=DEFAULT_REPORT_FILE, help="path to generated report file" + ) + parser.add_argument( + "--test-name", + help="human friendly name for this test, defaults to the " + "EVG_task_name environment variable", + ) + parser.add_argument( + "--expansions-file", + default="expansions.yml", + help="expansions-file with configuration needed by cedar", + ) return parser @@ -301,32 +298,32 @@ def main__cedar_report(argv=sys.argv[1:], env=None, cert_retriever_cls=CertRetri args = parser.parse_args(argv) if not env: - with open(args.expansions_file, 'r') as f: + with open(args.expansions_file, "r") as f: env = yaml.safe_load(f) - if env.get('cedar_mode', '') == 'skip': + if env.get("cedar_mode", "") == "skip": return if args.test_name: - env['test_name'] = args.test_name + env["test_name"] = args.test_name else: - env['test_name'] = env['task_name'] + env["test_name"] = env["task_name"] metrics_file_names, test_run_time = cedar.run(args) config = _Config(env, metrics_file_names, test_run_time) report_dict = build_report(config) - with open(args.report_file, 'w') as f: + with open(args.report_file, "w") as f: json.dump(report_dict, f, cls=RFCDateTimeEncoder) - jira_user = env['perf_jira_user'] - jira_pwd = env['perf_jira_pw'] + jira_user = env["perf_jira_user"] + jira_pwd = env["perf_jira_pw"] cr = cert_retriever_cls(jira_user, jira_pwd) runner = ShellCuratorRunner(cr, args.report_file) runner.run(runner.get_send_command()) -if __name__ == '__main__': +if __name__ == "__main__": main__cedar_report() diff --git a/src/python/gennylib/genny_auto_tasks.py b/src/python/gennylib/genny_auto_tasks.py index ef02d471b9..112fb76dd3 100644 --- a/src/python/gennylib/genny_auto_tasks.py +++ b/src/python/gennylib/genny_auto_tasks.py @@ -14,7 +14,7 @@ from shrub.variant import TaskSpec -class AutoRunSpec(): +class AutoRunSpec: """ AutoRunSpec class encapsulates the 'AutoRun' section of a workload yaml file, if it exists """ @@ -34,10 +34,11 @@ def _rename_prepare_environment(prepare_environment_with: dict): return if "mongodb_setup" in prepare_environment_with: prepare_environment_with["setup"] = prepare_environment_with["mongodb_setup"] - del(prepare_environment_with["mongodb_setup"]) + del prepare_environment_with["mongodb_setup"] if len(prepare_environment_with) > 1: raise Exception( - f"Can only provide 'mongodb_setup' (or 'setup') in PrepareEnvironmentWith") + f"Can only provide 'mongodb_setup' (or 'setup') in PrepareEnvironmentWith" + ) @staticmethod def create_from_workload_yaml(workload_yaml): @@ -47,17 +48,19 @@ def create_from_workload_yaml(workload_yaml): """ if workload_yaml is None or not isinstance(workload_yaml, dict): return None - if 'AutoRun' not in workload_yaml or not isinstance(workload_yaml['AutoRun'], dict): + if "AutoRun" not in workload_yaml or not isinstance(workload_yaml["AutoRun"], dict): return None - autorun = workload_yaml['AutoRun'] + autorun = workload_yaml["AutoRun"] required_dict = None - if 'Requires' in workload_yaml['AutoRun'] and isinstance(autorun['Requires'], dict): - required_dict = autorun['Requires'] + if "Requires" in workload_yaml["AutoRun"] and isinstance(autorun["Requires"], dict): + required_dict = autorun["Requires"] prepare_environment_with = None - if 'PrepareEnvironmentWith' in autorun and isinstance(autorun['PrepareEnvironmentWith'], dict): - prepare_environment_with = autorun['PrepareEnvironmentWith'] + if "PrepareEnvironmentWith" in autorun and isinstance( + autorun["PrepareEnvironmentWith"], dict + ): + prepare_environment_with = autorun["PrepareEnvironmentWith"] return AutoRunSpec(required_dict, prepare_environment_with) @@ -67,14 +70,15 @@ def get_prepare_environment_vars(self, prepare_environment_vars_template): :return: A list of prepare_environment_var dicts, one for each setup. """ prepare_environment_vars = [] - setup = self.prepare_environment_with['setup'] + setup = self.prepare_environment_with["setup"] if setup is not None and isinstance(setup, list): for setup_var in setup: curr = prepare_environment_vars_template.copy() curr.update(self.prepare_environment_with) - curr['setup'] = setup_var - curr['test'] = "{task_name}_{setup_var}".format( - task_name=curr['test'], setup_var=to_snake_case(setup_var)) + curr["setup"] = setup_var + curr["test"] = "{task_name}_{setup_var}".format( + task_name=curr["test"], setup_var=to_snake_case(setup_var) + ) prepare_environment_vars.append(curr) else: curr = prepare_environment_vars.copy() @@ -89,9 +93,9 @@ def to_snake_case(str): From: https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case :return: snake_case version of str. """ - s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', str) - s2 = re.sub('-', '_', s1) - return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s2).lower() + s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", str) + s2 = re.sub("-", "_", s1) + return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s2).lower() def cd_genny_root(): @@ -111,7 +115,7 @@ def get_project_root(): :return: the path of the project root. """ try: - out = subprocess.check_output('git rev-parse --show-toplevel', shell=True) + out = subprocess.check_output("git rev-parse --show-toplevel", shell=True) except subprocess.CalledProcessError as e: print(e.output, file=sys.stderr) raise e @@ -127,17 +131,19 @@ def modified_workload_files(): try: # Returns the names of files in src/workloads/ that have been added/modified/renamed since the common ancestor of HEAD and origin/master out = subprocess.check_output( - 'git diff --name-only --diff-filter=AMR $(git merge-base HEAD origin/master) -- src/workloads/', shell=True) + "git diff --name-only --diff-filter=AMR $(git merge-base HEAD origin/master) -- src/workloads/", + shell=True, + ) except subprocess.CalledProcessError as e: print(e.output, file=sys.stderr) raise e - if out.decode() == '': + if out.decode() == "": return [] # Make paths relative to workloads/ e.g. src/workloads/scale/NewTask.yml --> scale/NewTask.yml - short_filenames = [f.split('workloads/', 1)[1] for f in out.decode().strip().split('\n')] - short_filenames = list(filter(lambda x: x.endswith('.yml'), short_filenames)) + short_filenames = [f.split("workloads/", 1)[1] for f in out.decode().strip().split("\n")] + short_filenames = list(filter(lambda x: x.endswith(".yml"), short_filenames)) return short_filenames @@ -175,17 +181,17 @@ def autorun_workload_files(env_dict): :param dict env_dict: a dict representing the values from bootstrap.yml and runtime.yml -- the output of make_env_dict(). :return: a list of workload files whose AutoRun critera are met by the env_dict. """ - workload_dir = '{}/src/workloads'.format(get_project_root()) - candidates = glob.glob('{}/**/*.yml'.format(workload_dir), recursive=True) + workload_dir = "{}/src/workloads".format(get_project_root()) + candidates = glob.glob("{}/**/*.yml".format(workload_dir), recursive=True) matching_files = [] for fname in candidates: - with open(fname, 'r') as handle: + with open(fname, "r") as handle: workload_dict = yaml.safe_load(handle) autorun_spec = AutoRunSpec.create_from_workload_yaml(workload_dict) if workload_should_autorun(autorun_spec, env_dict): - matching_files.append(fname.split('/src/workloads/')[1]) + matching_files.append(fname.split("/src/workloads/")[1]) return matching_files @@ -195,34 +201,34 @@ def make_env_dict(dirname): :param str dir: the directory in which to look for bootstrap.yml and runtime.yml files. :return: a dict representation of bootstrap.yml and runtime.yml in the cwd, with top level keys 'bootstrap' and 'runtime' """ - env_files = ['bootstrap.yml', 'runtime.yml'] + env_files = ["bootstrap.yml", "runtime.yml"] env_dict = {} for fname in env_files: fname = os.path.join(dirname, fname) if not os.path.isfile(fname): return None - with open(fname, 'r') as handle: + with open(fname, "r") as handle: config = yaml.safe_load(handle) if config is None: return None - module = os.path.basename(fname).split('.yml')[0] + module = os.path.basename(fname).split(".yml")[0] env_dict[module] = config return env_dict def validate_user_workloads(workloads): if len(workloads) == 0: - return ['No workloads specified'] + return ["No workloads specified"] errors = [] genny_root = get_project_root() for w in workloads: - workload_path = '{}/src/workloads/{}'.format(genny_root, w) + workload_path = "{}/src/workloads/{}".format(genny_root, w) if not os.path.isfile(workload_path): - errors.append('no file at path {}'.format(workload_path)) + errors.append("no file at path {}".format(workload_path)) continue - if not workload_path.endswith('.yml'): - errors.append('{} is not a .yml workload file'.format(workload_path)) + if not workload_path.endswith(".yml"): + errors.append("{} is not a .yml workload file".format(workload_path)) return errors @@ -234,24 +240,24 @@ def get_prepare_environment_vars(task_name, fname): :return: A list of environment var dictionaries, one for each mongodb_setup used. """ autorun_spec = None - prepare_environment_vars_template = { - 'test': task_name, - 'auto_workload_path': fname - } + prepare_environment_vars_template = {"test": task_name, "auto_workload_path": fname} - full_filename = '{}/src/workloads/{}'.format(get_project_root(), fname) - with open(full_filename, 'r') as handle: + full_filename = "{}/src/workloads/{}".format(get_project_root(), fname) + with open(full_filename, "r") as handle: workload_dict = yaml.safe_load(handle) autorun_spec = AutoRunSpec.create_from_workload_yaml(workload_dict) prepare_environment_vars = [] if autorun_spec is not None and autorun_spec.prepare_environment_with is not None: - prepare_environment_vars = autorun_spec.get_prepare_environment_vars(prepare_environment_vars_template) + prepare_environment_vars = autorun_spec.get_prepare_environment_vars( + prepare_environment_vars_template + ) else: prepare_environment_vars.append(prepare_environment_vars_template) return prepare_environment_vars + def construct_all_tasks_json(): """ :return: json representation of tasks for all workloads in the /src/workloads directory relative to the genny root. @@ -259,30 +265,32 @@ def construct_all_tasks_json(): c = Configuration() c.exec_timeout(64800) # 18 hours - workload_dir = '{}/src/workloads'.format(get_project_root()) - all_workloads = glob.glob('{}/**/*.yml'.format(workload_dir), recursive=True) - all_workloads = [s.split('/src/workloads/')[1] for s in all_workloads] + workload_dir = "{}/src/workloads".format(get_project_root()) + all_workloads = glob.glob("{}/**/*.yml".format(workload_dir), recursive=True) + all_workloads = [s.split("/src/workloads/")[1] for s in all_workloads] for fname in all_workloads: basename = os.path.basename(fname) base_parts = os.path.splitext(basename) - if base_parts[1] != '.yml': + if base_parts[1] != ".yml": # Not a .yml workload file, ignore it. continue task_name = to_snake_case(base_parts[0]) - + prepare_environment_vars = get_prepare_environment_vars(task_name, fname) - + for prep_var in prepare_environment_vars: - t = c.task(prep_var['test']) + t = c.task(prep_var["test"]) t.priority(5) # The default priority in system_perf.yml - t.commands([ - CommandDefinition().function('prepare environment').vars(prep_var), - CommandDefinition().function('deploy cluster'), - CommandDefinition().function('run test'), - CommandDefinition().function('analyze'), - ]) + t.commands( + [ + CommandDefinition().function("prepare environment").vars(prep_var), + CommandDefinition().function("deploy cluster"), + CommandDefinition().function("run test"), + CommandDefinition().function("analyze"), + ] + ) return c.to_json() @@ -300,15 +308,15 @@ def construct_variant_json(workloads, variants): for fname in workloads: basename = os.path.basename(fname) base_parts = os.path.splitext(basename) - if base_parts[1] != '.yml': + if base_parts[1] != ".yml": # Not a .yml workload file, ignore it. continue task_name = to_snake_case(base_parts[0]) - prepare_environment_vars = get_prepare_environment_vars(task_name, fname) + prepare_environment_vars = get_prepare_environment_vars(task_name, fname) for prep_var in prepare_environment_vars: - task_specs.append(TaskSpec(prep_var['test'])) + task_specs.append(TaskSpec(prep_var["test"])) for v in variants: c.variant(v).tasks(task_specs) @@ -324,28 +332,49 @@ def main(): These two invocations are separate to ensure that no task will be generated more than once (which would cause generate.tasks to throw an error, even across different buildvariants). """ parser = argparse.ArgumentParser( - description="Generates json that can be used as input to evergreen's generate.tasks, representing genny workloads to be run") + description="Generates json that can be used as input to evergreen's generate.tasks, representing genny workloads to be run" + ) op_group = parser.add_mutually_exclusive_group(required=True) - op_group.add_argument('--variants', nargs='+', help='buildvariants that workloads should run on') - op_group.add_argument('--generate-all-tasks', action='store_true', - help='generates JSON task definitions for all genny workloads, does not attach them to any buildvariants') + op_group.add_argument( + "--variants", nargs="+", help="buildvariants that workloads should run on" + ) + op_group.add_argument( + "--generate-all-tasks", + action="store_true", + help="generates JSON task definitions for all genny workloads, does not attach them to any buildvariants", + ) workload_group = parser.add_mutually_exclusive_group() - workload_group.add_argument('--autorun', action='store_true', default=False, - help='if set, the script will generate tasks based on workloads\' AutoRun section and bootstrap.yml/runtime.yml files in the working directory') - workload_group.add_argument('--modified', action='store_true', default=False, - help='if set, the script will generate tasks for workloads that have been added/modifed locally, relative to origin/master') - parser.add_argument('--forced-workloads', nargs='+', - help='paths of workloads to run, relative to src/workloads/ in the genny repository root') - parser.add_argument('-o', '--output', default='build/generated_tasks.json', - help='path of file to output result json to, relative to the genny root directory') + workload_group.add_argument( + "--autorun", + action="store_true", + default=False, + help="if set, the script will generate tasks based on workloads' AutoRun section and bootstrap.yml/runtime.yml files in the working directory", + ) + workload_group.add_argument( + "--modified", + action="store_true", + default=False, + help="if set, the script will generate tasks for workloads that have been added/modifed locally, relative to origin/master", + ) + parser.add_argument( + "--forced-workloads", + nargs="+", + help="paths of workloads to run, relative to src/workloads/ in the genny repository root", + ) + parser.add_argument( + "-o", + "--output", + default="build/generated_tasks.json", + help="path of file to output result json to, relative to the genny root directory", + ) args = parser.parse_args(sys.argv[1:]) if args.generate_all_tasks and (args.autorun or args.modified): - parser.error('arguments --autorun and --modified not allowed with --generate-all-tasks') + parser.error("arguments --autorun and --modified not allowed with --generate-all-tasks") if args.variants and not (args.autorun or args.modified or args.forced_workloads): - parser.error('either --autorun, --modified, or --forced-workloads required with --variants') + parser.error("either --autorun, --modified, or --forced-workloads required with --variants") original_cwd = os.getcwd() cd_genny_root() @@ -357,21 +386,24 @@ def main(): env_dict = make_env_dict(original_cwd) if env_dict is None: print( - 'fatal error: bootstrap.yml and runtime.yml files not found in current directory, cannot AutoRun workloads\n\ - note: --autorun is intended to be called from within Evergreen. If using genny locally, please run the workload directly.', - file=sys.stderr) + "fatal error: bootstrap.yml and runtime.yml files not found in current directory, cannot AutoRun workloads\n\ + note: --autorun is intended to be called from within Evergreen. If using genny locally, please run the workload directly.", + file=sys.stderr, + ) print(os.getcwd(), file=sys.stderr) return workloads = autorun_workload_files(env_dict) if len(workloads) == 0: - print('No AutoRun workloads found matching environment, generating no tasks.') + print("No AutoRun workloads found matching environment, generating no tasks.") elif args.modified: workloads = modified_workload_files() if len(workloads) == 0 and args.forced_workloads is None: - raise Exception('No modified workloads found.\n\ + raise Exception( + "No modified workloads found.\n\ No results from command: git diff --name-only --diff-filter=AMR $(git merge-base HEAD origin/master) -- ../workloads/\n\ - Ensure that any added/modified workloads have been committed locally.') + Ensure that any added/modified workloads have been committed locally." + ) else: workloads = [] @@ -379,24 +411,24 @@ def main(): errs = validate_user_workloads(args.forced_workloads) if len(errs) > 0: for e in errs: - print('invalid workload: {}'.format(e), file=sys.stderr) + print("invalid workload: {}".format(e), file=sys.stderr) return workloads.extend(args.forced_workloads) output_json = construct_variant_json(workloads, args.variants) - if args.output == 'stdout': + if args.output == "stdout": print(output_json) return # Interpret args.output relative to the genny root directory. project_root = get_project_root() - output_path = '{}/{}'.format(project_root, args.output) - with open(output_path, 'w') as output_file: + output_path = "{}/{}".format(project_root, args.output) + with open(output_path, "w") as output_file: output_file.write(output_json) - print('Wrote generated JSON to {}'.format(output_path)) + print("Wrote generated JSON to {}".format(output_path)) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/src/python/gennylib/genny_runner.py b/src/python/gennylib/genny_runner.py index d983423dc9..ff6481b940 100644 --- a/src/python/gennylib/genny_runner.py +++ b/src/python/gennylib/genny_runner.py @@ -3,6 +3,7 @@ import subprocess from contextlib import contextmanager + def get_genny_args(): """ Returns the argument list used to create the Genny process. @@ -11,14 +12,15 @@ def get_genny_args(): Otherwise we search the PATH. """ args = sys.argv - genny_core = 'genny_core' + genny_core = "genny_core" - local_core = './dist/bin/genny_core' + local_core = "./dist/bin/genny_core" if os.path.exists(local_core): genny_core = local_core args[0] = genny_core return args + def get_poplar_args(): """ Returns the argument list used to create the Poplar gRPC process. @@ -26,12 +28,13 @@ def get_poplar_args(): If we are in the root of the genny repo, use the local executable. Otherwise we search the PATH. """ - curator = 'curator' + curator = "curator" - local_curator = './curator/curator' + local_curator = "./curator/curator" if os.path.exists(local_curator): curator = local_curator - return [curator, 'poplar', 'grpc'] + return [curator, "poplar", "grpc"] + @contextmanager def poplar_grpc(): @@ -45,7 +48,7 @@ def poplar_grpc(): try: poplar.terminate() exit_code = poplar.wait(timeout=10) - if exit_code not in (0, -15): # Termination or exit. + if exit_code not in (0, -15): # Termination or exit. raise OSError("Poplar exited with code: {code}.".format(code=(exit_code))) except: @@ -54,15 +57,16 @@ def poplar_grpc(): poplar.kill() raise - + def main_genny_runner(): """ Intended to be the main entry point for running Genny. """ - + with poplar_grpc(): res = subprocess.run(get_genny_args()) res.check_returncode() - -if __name__ == '__main__': + + +if __name__ == "__main__": main_genny_runner() diff --git a/src/python/gennylib/legacy_report.py b/src/python/gennylib/legacy_report.py index 7a85d7229e..8c78e95aea 100644 --- a/src/python/gennylib/legacy_report.py +++ b/src/python/gennylib/legacy_report.py @@ -10,10 +10,13 @@ def build_parser(): - parser = argparse.ArgumentParser(description='Convert cedar-csv output into legacy perf.json' - 'report file format') - parser.add_argument('--report-file', default='perf.json', help='path to the perf.json report file') - parser.add_argument('input_file', metavar='input-file', help='path to genny csv2 perf data') + parser = argparse.ArgumentParser( + description="Convert cedar-csv output into legacy perf.json" "report file format" + ) + parser.add_argument( + "--report-file", default="perf.json", help="path to the perf.json report file" + ) + parser.add_argument("input_file", metavar="input-file", help="path to genny csv2 perf data") return parser @@ -34,29 +37,31 @@ def finalize(self): def _asdict(self): # Name derives from the _asdict method of colllections.namedtuple return { - 'started': self.started, - 'ended': self.ended, - 'ops_per_ns': self.ops_per_ns, - 'threads': self.threads + "started": self.started, + "ended": self.ended, + "ops_per_ns": self.ops_per_ns, + "threads": self.threads, } def _translate_to_perf_json(timers): out = [] for name, timer in timers.items(): - out.append({ - 'name': name, - 'workload': name, - 'start': timer['started'] / 100000, - 'end': timer['ended'] / 100000, - 'results': { - len(timer['threads']): { - 'ops_per_sec': timer['ops_per_ns'] * 1e9, - 'ops_per_sec_values': [timer['ops_per_ns'] * 1e9] - } + out.append( + { + "name": name, + "workload": name, + "start": timer["started"] / 100000, + "end": timer["ended"] / 100000, + "results": { + len(timer["threads"]): { + "ops_per_sec": timer["ops_per_ns"] * 1e9, + "ops_per_sec_values": [timer["ops_per_ns"] * 1e9], + } + }, } - }) - return {'results': out} + ) + return {"results": out} def run(args): @@ -70,7 +75,7 @@ def run(args): report = None for line, actor in data_reader: - cur_metric_name = '{}-{}'.format(actor, line[IntermediateCSVColumns.OPERATION]) + cur_metric_name = "{}-{}".format(actor, line[IntermediateCSVColumns.OPERATION]) if cur_metric_name != metric_name: if report: @@ -92,7 +97,7 @@ def run(args): iterations = iterations + 1 if iterations % 1e6 == 0: - logging.info('Processed %d metrics lines', iterations) + logging.info("Processed %d metrics lines", iterations) if report: # pylint: disable=protected-access @@ -100,7 +105,7 @@ def run(args): result = _translate_to_perf_json(timers) - with open(args.report_file, 'w') as f: + with open(args.report_file, "w") as f: json.dump(result, f) diff --git a/src/python/gennylib/parsers/cedar.py b/src/python/gennylib/parsers/cedar.py index a3f17a56b2..b87e5fd68f 100644 --- a/src/python/gennylib/parsers/cedar.py +++ b/src/python/gennylib/parsers/cedar.py @@ -123,23 +123,42 @@ def _parse_into_cedar_format(self, line): ts = datetime.utcfromtimestamp(line[IntermediateCSVColumns.UNIX_TIME] / 1000) op = line[IntermediateCSVColumns.OPERATION] - res = OrderedDict([ - ('ts', ts), - ('id', Int64(line[IntermediateCSVColumns.THREAD])), - ('counters', OrderedDict([ - ('n', Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.N])), - ('ops', Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.OPS])), - ('size', Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.SIZE])), - ('errors', Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.ERRORS])) - ])), - ('timers', OrderedDict([ - ('duration', Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.DURATION])), - ('total', Int64(self.total_for_op[op])) - ])), - ('gauges', OrderedDict([ - ('workers', Int64(line[IntermediateCSVColumns.WORKERS])) - ])) - ]) + res = OrderedDict( + [ + ("ts", ts), + ("id", Int64(line[IntermediateCSVColumns.THREAD])), + ( + "counters", + OrderedDict( + [ + ("n", Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.N])), + ("ops", Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.OPS])), + ( + "size", + Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.SIZE]), + ), + ( + "errors", + Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.ERRORS]), + ), + ] + ), + ), + ( + "timers", + OrderedDict( + [ + ( + "duration", + Int64(self.cumulatives_for_op[op][IntermediateCSVColumns.DURATION]), + ), + ("total", Int64(self.total_for_op[op])), + ] + ), + ), + ("gauges", OrderedDict([("workers", Int64(line[IntermediateCSVColumns.WORKERS]))])), + ] + ) return res, op @@ -163,7 +182,7 @@ def split_into_actor_operation_and_transform_to_cedar_format(file_name, out_dir) for ordered_dict, op in IntermediateCSVReader(reader): out_file_name = pjoin(out_dir, "{}-{}.bson".format(actor_name, op)) if out_file_name not in out_files: - out_files[out_file_name] = open(out_file_name, 'wb') + out_files[out_file_name] = open(out_file_name, "wb") out_files[out_file_name].write(BSON.encode(ordered_dict)) finally: for fh in out_files.values(): @@ -193,9 +212,9 @@ def split_into_actor_csv_files(data_reader, out_dir): cur_out_fh.close() # Open new csv file. - file_name = actor + '.csv' + file_name = actor + ".csv" output_files.append(file_name) - cur_out_fh = open(pjoin(out_dir, file_name), 'w', newline='') + cur_out_fh = open(pjoin(out_dir, file_name), "w", newline="") # Quote non-numeric values so they get converted to float automatically cur_out_csv = csv.writer(cur_out_fh, quoting=csv.QUOTE_NONNUMERIC) @@ -205,7 +224,7 @@ def split_into_actor_csv_files(data_reader, out_dir): counter += 1 if counter % 1e6 == 0: - print('Parsed {} metrics'.format(counter)) + print("Parsed {} metrics".format(counter)) if cur_out_fh is not None: cur_out_fh.close() @@ -215,20 +234,23 @@ def split_into_actor_csv_files(data_reader, out_dir): def sort_csv_file(file_name, out_dir): # Sort on Timestamp and Thread. - csvsort(pjoin(out_dir, file_name), - [IntermediateCSVColumns.TS, IntermediateCSVColumns.THREAD], - quoting=csv.QUOTE_NONNUMERIC, - has_header=True, - show_progress=True) + csvsort( + pjoin(out_dir, file_name), + [IntermediateCSVColumns.TS, IntermediateCSVColumns.THREAD], + quoting=csv.QUOTE_NONNUMERIC, + has_header=True, + show_progress=True, + ) def build_parser(): parser = argparse.ArgumentParser( - description='Convert Genny csv2 perf data to Cedar BSON format', + description="Convert Genny csv2 perf data to Cedar BSON format" + ) + parser.add_argument("input_file", metavar="input-file", help="path to genny csv2 perf data") + parser.add_argument( + "output_dir", metavar="output-dir", help="directory to store output BSON files" ) - parser.add_argument('input_file', metavar='input-file', help='path to genny csv2 perf data') - parser.add_argument('output_dir', metavar='output-dir', - help='directory to store output BSON files') return parser @@ -252,13 +274,14 @@ def run(args): files = split_into_actor_csv_files(data_reader, out_dir) for f in files: - logging.info('Processing metrics in file %s', f) + logging.info("Processing metrics in file %s", f) # csvsort by timestamp, thread sort_csv_file(f, out_dir) # compute cumulative and stream output to bson file metrics_file_names.extend( - split_into_actor_operation_and_transform_to_cedar_format(f, out_dir)) + split_into_actor_operation_and_transform_to_cedar_format(f, out_dir) + ) return metrics_file_names, my_csv2.approximate_test_run_time diff --git a/src/python/gennylib/parsers/csv2.py b/src/python/gennylib/parsers/csv2.py index 658c3d5198..939c0d6527 100644 --- a/src/python/gennylib/parsers/csv2.py +++ b/src/python/gennylib/parsers/csv2.py @@ -33,6 +33,7 @@ class CSVColumns(object): """ Object oriented access to csv header/column mapping. """ + _COLUMNS = None @classmethod @@ -45,10 +46,10 @@ def add_column(cls, col_name, col_index): upper_col_name = col_name.upper() # Python class constants should be uppercase. if not hasattr(cls, upper_col_name): - raise CSV2ParsingError('%s doesn\'t have column %s', cls.__name__, col_name) + raise CSV2ParsingError("%s doesn't have column %s", cls.__name__, col_name) if not isinstance(cls._COLUMNS, set): - raise ValueError('Subclass must have the class property `_COLUMN = set()`') + raise ValueError("Subclass must have the class property `_COLUMN = set()`") setattr(cls, upper_col_name, col_index) cls._COLUMNS.add(upper_col_name) @@ -90,8 +91,9 @@ def _parse_into_intermediate_csv(self, line): # Eagerly error if OUTCOME is > 1 if line[_OpColumns.OUTCOME] > 1: - raise CSV2ParsingError('Unexpected outcome on line %d: %s', self.raw_reader.line_num, - line) + raise CSV2ParsingError( + "Unexpected outcome on line %d: %s", self.raw_reader.line_num, line + ) op = line[_OpColumns.OPERATION] actor = line[_OpColumns.ACTOR] @@ -172,7 +174,7 @@ def __init__(self, csv2_file_name): self._csv2_file_name = csv2_file_name def metric_to_system_time_ns(self, metric_time): - assert self._unix_epoch_offset_ns, 'test run time not available yet' + assert self._unix_epoch_offset_ns, "test run time not available yet" return metric_time + self._unix_epoch_offset_ns @property @@ -183,30 +185,30 @@ def approximate_test_run_time(self): :raises: AssertionError if the offset has not been set yet. :return: datetime.timedelta instance representing the run time. """ - assert self._metrics_time_ns, 'test run time not available yet' + assert self._metrics_time_ns, "test run time not available yet" return datetime.timedelta(microseconds=(self._metrics_time_ns / 1000)) @contextlib.contextmanager def data_reader(self): # parsers for newline-delimited sections in genny's csv2 file. header_parsers = { - 'Clocks': self._parse_clocks, - 'OperationThreadCounts': self._parse_thread_count, - 'Operations': self._parse_operations + "Clocks": self._parse_clocks, + "OperationThreadCounts": self._parse_thread_count, + "Operations": self._parse_operations, } - with open(self._csv2_file_name, 'r') as csv2_file: + with open(self._csv2_file_name, "r") as csv2_file: try: reader = csv.reader(csv2_file, dialect=_Dialect) while True: title = next(reader)[0] if title not in header_parsers: - raise CSV2ParsingError('Unknown csv2 section title %s', title) + raise CSV2ParsingError("Unknown csv2 section title %s", title) should_stop = header_parsers[title](reader) if should_stop: break except (IndexError, ValueError) as e: - raise CSV2ParsingError('Error parsing CSV file: ', self._csv2_file_name) from e + raise CSV2ParsingError("Error parsing CSV file: ", self._csv2_file_name) from e yield self._data_reader @@ -215,17 +217,14 @@ def _parse_clocks(self, reader): line = next(reader) - times = { - 'SystemTime': None, - 'MetricsTime': None - } + times = {"SystemTime": None, "MetricsTime": None} while line: times[line[_ClockColumns.CLOCK]] = int(line[_ClockColumns.NANOSECONDS]) line = next(reader) - self._unix_epoch_offset_ns = times['SystemTime'] - times['MetricsTime'] - self._metrics_time_ns = times['MetricsTime'] + self._unix_epoch_offset_ns = times["SystemTime"] - times["MetricsTime"] + self._metrics_time_ns = times["MetricsTime"] return False @@ -244,8 +243,9 @@ def _parse_thread_count(self, reader): def _parse_operations(self, reader): _OpColumns.add_columns([h.strip() for h in next(reader)]) - self._data_reader = _DataReader(reader, self._operation_thread_count_map, - self._unix_epoch_offset_ns) + self._data_reader = _DataReader( + reader, self._operation_thread_count_map, self._unix_epoch_offset_ns + ) return True @@ -272,5 +272,16 @@ def default_columns(cls): Ordered list of default columns to write to the CSV, must match the column names in the class attributes. """ - return ['unix_time', 'ts', 'thread', 'operation', 'duration', 'outcome', 'n', - 'ops', 'errors', 'size', 'workers'] + return [ + "unix_time", + "ts", + "thread", + "operation", + "duration", + "outcome", + "n", + "ops", + "errors", + "size", + "workers", + ] diff --git a/src/python/gennylib/yaml_linter.py b/src/python/gennylib/yaml_linter.py index 74f541bfbf..64d12b434e 100644 --- a/src/python/gennylib/yaml_linter.py +++ b/src/python/gennylib/yaml_linter.py @@ -9,53 +9,52 @@ def main(): logging.basicConfig(level=logging.INFO) - if not path.exists('.genny-root'): - logging.error('Please run this script from the root of the Genny repository') + if not path.exists(".genny-root"): + logging.error("Please run this script from the root of the Genny repository") sys.exit(1) yaml_dirs = [ - path.join(os.getcwd(), 'src', 'workloads'), - path.join(os.getcwd(), 'src', 'phases'), - path.join(os.getcwd(), 'src', 'resmokeconfig') + path.join(os.getcwd(), "src", "workloads"), + path.join(os.getcwd(), "src", "phases"), + path.join(os.getcwd(), "src", "resmokeconfig"), ] - yaml_files = [ - path.join(os.getcwd(), 'evergreen.yml') - ] + yaml_files = [path.join(os.getcwd(), "evergreen.yml")] has_error = False for yaml_dir in yaml_dirs: for dirpath, dirnames, filenames in os.walk(yaml_dir): for filename in filenames: - if filename.endswith('.yaml'): - logging.error('All YAML files should have the .yml extension, found %s', filename) + if filename.endswith(".yaml"): + logging.error( + "All YAML files should have the .yml extension, found %s", filename + ) # Don't error immediately so all violations can be printed with one run # of this script. has_error = True - elif filename.endswith('.yml'): + elif filename.endswith(".yml"): yaml_files.append(path.join(dirpath, filename)) if has_error: sys.exit(1) if len(yaml_files) == 0: - logging.error('Did not find any YAML files to lint in the directories: %s', ' '.join(yaml_dirs)) + logging.error( + "Did not find any YAML files to lint in the directories: %s", " ".join(yaml_dirs) + ) sys.exit(1) - config_file_path = path.join(os.getcwd(), '.yamllint') + config_file_path = path.join(os.getcwd(), ".yamllint") - yamllint_argv = sys.argv[1:] + [ - '--strict', - '--config-file', config_file_path, - ] + yaml_files + yamllint_argv = sys.argv[1:] + ["--strict", "--config-file", config_file_path] + yaml_files - print('Linting {} Genny workload YAML files with yamllint'.format(len(yaml_files))) + print("Linting {} Genny workload YAML files with yamllint".format(len(yaml_files))) - logging.debug('Invoking yamllint with the following command: '.join(yamllint_argv)) + logging.debug("Invoking yamllint with the following command: ".join(yamllint_argv)) yamllint.cli.run(yamllint_argv) -if __name__ == '__main__': - main() \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/src/python/setup.py b/src/python/setup.py index fa967dc147..336e739373 100644 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -14,32 +14,28 @@ from setuptools import setup -setup(name='genny', - version='1.0', - packages=[ - 'gennylib', - 'gennylib.parsers', - 'third_party' - ], - install_requires=[ - 'nose==1.3.7', - 'yapf==0.24.0', - 'pymongo==3.7.2', - 'PyYAML==5.1', - 'requests==2.21.0', - 'yamllint==1.15.0', - 'shrub.py==0.2.3' - ], - setup_requires=[ - 'nose==1.3.7' - ], - entry_points={ - 'console_scripts': [ - 'genny-metrics-report = gennylib.cedar_report:main__cedar_report', - 'genny-metrics-legacy-report = gennylib.legacy_report:main__legacy_report', - 'lint-yaml = gennylib.yaml_linter:main', - 'genny = gennylib.genny_runner:main_genny_runner', - 'genny-auto-tasks = gennylib.genny_auto_tasks:main' - ] - }, - ) +setup( + name="genny", + version="1.0", + packages=["gennylib", "gennylib.parsers", "third_party"], + install_requires=[ + "nose==1.3.7", + "yapf==0.24.0", + "pymongo==3.7.2", + "PyYAML==5.1", + "requests==2.21.0", + "yamllint==1.15.0", + "shrub.py==0.2.3", + "black==19.10b0", + ], + setup_requires=["nose==1.3.7"], + entry_points={ + "console_scripts": [ + "genny-metrics-report = gennylib.cedar_report:main__cedar_report", + "genny-metrics-legacy-report = gennylib.legacy_report:main__legacy_report", + "lint-yaml = gennylib.yaml_linter:main", + "genny = gennylib.genny_runner:main_genny_runner", + "genny-auto-tasks = gennylib.genny_auto_tasks:main", + ] + }, +) diff --git a/src/python/tests/auto_tasks_test.py b/src/python/tests/auto_tasks_test.py index 812ac2e9d3..5a7eb9adbe 100644 --- a/src/python/tests/auto_tasks_test.py +++ b/src/python/tests/auto_tasks_test.py @@ -14,9 +14,8 @@ class AutoTasksTest(unittest.TestCase): - - @patch('gennylib.genny_auto_tasks.open', new_callable=mock_open, read_data='') - @patch('glob.glob') + @patch("gennylib.genny_auto_tasks.open", new_callable=mock_open, read_data="") + @patch("glob.glob") def test_construct_all_tasks_json(self, mock_glob, mock_open): """ This test runs construct_all_tasks_json with static workloads @@ -24,61 +23,63 @@ def test_construct_all_tasks_json(self, mock_glob, mock_open): the generated json is what evergreen will expect to generate the correct tasks. """ - mock_glob.return_value = ["genny/src/workloads/scale/NewWorkload.yml", - "genny/src/workloads/subdir1/subdir2/subdir3/NestedTest.yml", - "/the/full/path/to/genny/src/workloads/execution/ExecutionTask.yml"] + mock_glob.return_value = [ + "genny/src/workloads/scale/NewWorkload.yml", + "genny/src/workloads/subdir1/subdir2/subdir3/NestedTest.yml", + "/the/full/path/to/genny/src/workloads/execution/ExecutionTask.yml", + ] expected_json = { - 'tasks': [ + "tasks": [ { - 'name': 'new_workload', - 'commands': [ + "name": "new_workload", + "commands": [ { - 'func': 'prepare environment', - 'vars': { - 'test': 'new_workload', - 'auto_workload_path': 'scale/NewWorkload.yml' - } + "func": "prepare environment", + "vars": { + "test": "new_workload", + "auto_workload_path": "scale/NewWorkload.yml", + }, }, - {'func': 'deploy cluster'}, - {'func': 'run test'}, - {'func': 'analyze'} + {"func": "deploy cluster"}, + {"func": "run test"}, + {"func": "analyze"}, ], - 'priority': 5 + "priority": 5, }, { - 'name': 'nested_test', - 'commands': [ + "name": "nested_test", + "commands": [ { - 'func': 'prepare environment', - 'vars': { - 'test': 'nested_test', - 'auto_workload_path': 'subdir1/subdir2/subdir3/NestedTest.yml' - } + "func": "prepare environment", + "vars": { + "test": "nested_test", + "auto_workload_path": "subdir1/subdir2/subdir3/NestedTest.yml", + }, }, - {'func': 'deploy cluster'}, - {'func': 'run test'}, - {'func': 'analyze'} + {"func": "deploy cluster"}, + {"func": "run test"}, + {"func": "analyze"}, ], - 'priority': 5 + "priority": 5, }, { - 'name': 'execution_task', - 'commands': [ + "name": "execution_task", + "commands": [ { - 'func': 'prepare environment', - 'vars': { - 'test': 'execution_task', - 'auto_workload_path': 'execution/ExecutionTask.yml' - } + "func": "prepare environment", + "vars": { + "test": "execution_task", + "auto_workload_path": "execution/ExecutionTask.yml", + }, }, - {'func': 'deploy cluster'}, - {'func': 'run test'}, - {'func': 'analyze'} + {"func": "deploy cluster"}, + {"func": "run test"}, + {"func": "analyze"}, ], - 'priority': 5 + "priority": 5, }, ], - 'timeout': 64800, + "timeout": 64800, } actual_json_str = construct_all_tasks_json() @@ -86,9 +87,9 @@ def test_construct_all_tasks_json(self, mock_glob, mock_open): self.assertDictEqual(expected_json, actual_json) - @patch('gennylib.genny_auto_tasks.open', new_callable=mock_open, read_data='') - @patch('yaml.safe_load') - @patch('glob.glob') + @patch("gennylib.genny_auto_tasks.open", new_callable=mock_open, read_data="") + @patch("yaml.safe_load") + @patch("glob.glob") def test_construct_all_tasks_json_multiple_setups(self, mock_glob, mock_safe_load, mock_open): """ Makes sure that the code works when generating tasks with multiple setups. @@ -96,52 +97,46 @@ def test_construct_all_tasks_json_multiple_setups(self, mock_glob, mock_safe_loa mock_glob.return_value = ["genny/src/workloads/scale/MultipleSetups.yml"] mock_safe_load.return_value = { - 'AutoRun': { - 'PrepareEnvironmentWith': { - 'mongodb_setup': ['first', 'second'] - } - } + "AutoRun": {"PrepareEnvironmentWith": {"mongodb_setup": ["first", "second"]}} } expected_json = { - 'tasks': [ + "tasks": [ { - 'name': 'multiple_setups_first', - 'commands': [ + "name": "multiple_setups_first", + "commands": [ { - 'func': 'prepare environment', - 'vars': { - 'test': 'multiple_setups_first', - 'auto_workload_path': 'scale/MultipleSetups.yml', - 'setup': 'first' - } + "func": "prepare environment", + "vars": { + "test": "multiple_setups_first", + "auto_workload_path": "scale/MultipleSetups.yml", + "setup": "first", + }, }, - {'func': 'deploy cluster'}, - {'func': 'run test'}, - {'func': 'analyze'} + {"func": "deploy cluster"}, + {"func": "run test"}, + {"func": "analyze"}, ], - 'priority': 5 + "priority": 5, }, { - 'name': 'multiple_setups_second', - 'commands': [ + "name": "multiple_setups_second", + "commands": [ { - 'func': 'prepare environment', - 'vars': { - 'test': 'multiple_setups_second', - 'auto_workload_path': 'scale/MultipleSetups.yml', - 'setup': 'second' - } + "func": "prepare environment", + "vars": { + "test": "multiple_setups_second", + "auto_workload_path": "scale/MultipleSetups.yml", + "setup": "second", + }, }, - {'func': 'deploy cluster'}, - {'func': 'run test'}, - {'func': 'analyze'} + {"func": "deploy cluster"}, + {"func": "run test"}, + {"func": "analyze"}, ], - 'priority': 5 + "priority": 5, }, - - ], - 'timeout': 64800, + "timeout": 64800, } actual_json_str = construct_all_tasks_json() @@ -149,9 +144,9 @@ def test_construct_all_tasks_json_multiple_setups(self, mock_glob, mock_safe_loa self.assertDictEqual(expected_json, actual_json) - @patch('gennylib.genny_auto_tasks.open', new_callable=mock_open, read_data='') - @patch('glob.glob') - @patch('gennylib.genny_auto_tasks.modified_workload_files') + @patch("gennylib.genny_auto_tasks.open", new_callable=mock_open, read_data="") + @patch("glob.glob") + @patch("gennylib.genny_auto_tasks.modified_workload_files") def test_construct_variant_json(self, mock_glob, mock_modified_workload_files, mock_open): """ This test runs construct_variant_json with static workloads and variants @@ -159,26 +154,17 @@ def test_construct_variant_json(self, mock_glob, mock_modified_workload_files, m the generated json is what evergreen will expect to generate the correct variants. """ - mock_modified_workload_files.return_value = ["scale/NewWorkload.yml", "subdir1/subdir2/subdir3/NestedTest.yml", - "non-yaml-file.md"] - static_variants = ['variant-1', 'variant-2'] + mock_modified_workload_files.return_value = [ + "scale/NewWorkload.yml", + "subdir1/subdir2/subdir3/NestedTest.yml", + "non-yaml-file.md", + ] + static_variants = ["variant-1", "variant-2"] expected_json = { - 'buildvariants': [ - { - 'name': 'variant-1', - 'tasks': [ - {'name': 'new_workload'}, - {'name': 'nested_test'} - ] - }, - { - 'name': 'variant-2', - 'tasks': [ - {'name': 'new_workload'}, - {'name': 'nested_test'} - ] - } + "buildvariants": [ + {"name": "variant-1", "tasks": [{"name": "new_workload"}, {"name": "nested_test"}]}, + {"name": "variant-2", "tasks": [{"name": "new_workload"}, {"name": "nested_test"}]}, ] } @@ -188,43 +174,41 @@ def test_construct_variant_json(self, mock_glob, mock_modified_workload_files, m self.assertDictEqual(expected_json, actual_json) - @patch('gennylib.genny_auto_tasks.open', new_callable=mock_open, read_data='') - @patch('glob.glob') - @patch('yaml.safe_load') - @patch('gennylib.genny_auto_tasks.modified_workload_files') - def test_construct_variant_json_multiple_setups(self, mock_glob, mock_safe_load, mock_modified_workload_files, mock_open): + @patch("gennylib.genny_auto_tasks.open", new_callable=mock_open, read_data="") + @patch("glob.glob") + @patch("yaml.safe_load") + @patch("gennylib.genny_auto_tasks.modified_workload_files") + def test_construct_variant_json_multiple_setups( + self, mock_glob, mock_safe_load, mock_modified_workload_files, mock_open + ): """ This test runs construct_variant_json with static workloads and variants and checks that the generated json is what evergreen will expect to generate the correct variants. """ - static_variants = ['variant-1', 'variant-2'] + static_variants = ["variant-1", "variant-2"] mock_modified_workload_files.return_value = ["genny/src/workloads/scale/MultipleSetups.yml"] mock_safe_load.return_value = { - 'AutoRun': { - 'PrepareEnvironmentWith': { - 'setup': ['first', 'second'] - } - } + "AutoRun": {"PrepareEnvironmentWith": {"setup": ["first", "second"]}} } expected_json = { - 'buildvariants': [ + "buildvariants": [ { - 'name': 'variant-1', - 'tasks': [ - {'name': 'multiple_setups_first'}, - {'name': 'multiple_setups_second'}, - ] + "name": "variant-1", + "tasks": [ + {"name": "multiple_setups_first"}, + {"name": "multiple_setups_second"}, + ], }, { - 'name': 'variant-2', - 'tasks': [ - {'name': 'multiple_setups_first'}, - {'name': 'multiple_setups_second'}, - ] - } + "name": "variant-2", + "tasks": [ + {"name": "multiple_setups_first"}, + {"name": "multiple_setups_second"}, + ], + }, ] } @@ -234,7 +218,7 @@ def test_construct_variant_json_multiple_setups(self, mock_glob, mock_safe_load, self.assertDictEqual(expected_json, actual_json) - @patch('subprocess.check_output') + @patch("subprocess.check_output") def test_modified_workload_files(self, mock_check_output): """ The test calls modified_workload_files() with a stubbed version of subprocess.check_output returning static git results @@ -244,20 +228,14 @@ def test_modified_workload_files(self, mock_check_output): # Cases that should be executed successfully. cases = [ + (b"../workloads/sub/abc.yml\n", ["sub/abc.yml"]), ( - b'../workloads/sub/abc.yml\n', - ['sub/abc.yml'] - ), - ( - b'../workloads/sub1/foo.yml\n\ + b"../workloads/sub1/foo.yml\n\ ../workloads/sub2/bar.yml\n\ - ../workloads/a/very/very/nested/file.yml\n', - ['sub1/foo.yml', 'sub2/bar.yml', 'a/very/very/nested/file.yml'] - ), - ( - b'', - [] + ../workloads/a/very/very/nested/file.yml\n", + ["sub1/foo.yml", "sub2/bar.yml", "a/very/very/nested/file.yml"], ), + (b"", []), ] for tc in cases: @@ -267,32 +245,17 @@ def test_modified_workload_files(self, mock_check_output): self.assertEqual(expected_files, actual_files) # Check that we handle errors from subprocess.check_output properly. - mock_check_output.side_effect = CalledProcessError(127, 'cmd') + mock_check_output.side_effect = CalledProcessError(127, "cmd") with self.assertRaises(CalledProcessError) as cm: modified_workload_files() def test_validate_user_workloads(self): cases = [ - ( - ['scale/BigUpdate.yml'], - [] - ), - ( - ['scale/InsertBigDocs.yml', 'selftests/GennyOverhead.yml'], - [] - ), - ( - ['networking/NonExistent.yml'], - ['no file'] - ), - ( - ['scale/BigUpdate.yml', 'docs'], - ['no file'] - ), - ( - ['CMakeLists.txt'], - ['not a .yml'] - ), + (["scale/BigUpdate.yml"], []), + (["scale/InsertBigDocs.yml", "selftests/GennyOverhead.yml"], []), + (["networking/NonExistent.yml"], ["no file"]), + (["scale/BigUpdate.yml", "docs"], ["no file"]), + (["CMakeLists.txt"], ["not a .yml"]), ] for tc in cases: diff --git a/src/python/tests/cedar_report_test.py b/src/python/tests/cedar_report_test.py index ee7c76757f..564be991a1 100644 --- a/src/python/tests/cedar_report_test.py +++ b/src/python/tests/cedar_report_test.py @@ -19,159 +19,171 @@ def _fetch(url, output, **kwargs): class CedarReportTest(unittest.TestCase): - - @patch('gennylib.cedar_report.ShellCuratorRunner.run') + @patch("gennylib.cedar_report.ShellCuratorRunner.run") def test_cedar_report(self, mock_uploader_run): """ This test documents the environment variables needed to run cedar_report.py and checks that the environment variables are correctly used. """ + class MatchAnyString(object): def __eq__(self, other): return type(other) == str mock_env = { - 'task_name': 'my_task_name', - 'project': 'my_project', - 'version_id': 'my_version', - 'revision_order_id': '1', - 'build_variant': 'my_variant', - 'task_id': 'my_task_id', - 'execution': '1', - 'is_patch': 'true', # This should get converted to mainline = False in the report. - - 'test_name': 'my_test_name', - - 'perf_jira_user': 'my_username', - 'perf_jira_pw': 'my_password', - - 'terraform_key': 'my_aws_key', - 'terraform_secret': 'my_aws_secret' + "task_name": "my_task_name", + "project": "my_project", + "version_id": "my_version", + "revision_order_id": "1", + "build_variant": "my_variant", + "task_id": "my_task_id", + "execution": "1", + "is_patch": "true", # This should get converted to mainline = False in the report. + "test_name": "my_test_name", + "perf_jira_user": "my_username", + "perf_jira_pw": "my_password", + "terraform_key": "my_aws_key", + "terraform_secret": "my_aws_secret", } expected_uploader_run_args = [ - 'curator', 'poplar', 'send', '--service', 'cedar.mongodb.com:7070', '--cert', - 'cedar.user.crt', '--key', 'cedar.user.key', '--ca', 'cedar.ca.pem', '--path', - 'cedar_report.json'] + "curator", + "poplar", + "send", + "--service", + "cedar.mongodb.com:7070", + "--cert", + "cedar.user.crt", + "--key", + "cedar.user.key", + "--ca", + "cedar.ca.pem", + "--path", + "cedar_report.json", + ] expected_json = { - 'project': 'my_project', - 'version': 'my_version', - 'order': 1, - 'variant': 'my_variant', - 'task_name': 'my_task_name', - 'task_id': 'my_task_id', - 'execution_number': 1, - 'mainline': False, - 'tests': [{ - 'info': { - 'test_name': 'my_task_name', - 'trial': 0, - 'tags': [], - 'args': {} - }, - 'created_at': _FIXED_DATETIME, - 'completed_at': _FIXED_DATETIME, - 'artifacts': [], - 'metrics': None, - 'sub_tests': [{ - 'info': { - 'test_name': 'HelloWorld-Greetings', - 'trial': 0, - 'tags': [], - 'args': {} - }, - 'created_at': MatchAnyString(), - 'completed_at': MatchAnyString(), - 'artifacts': [{ - 'bucket': 'genny-metrics', - 'path': 'HelloWorld-Greetings', - 'tags': [], - 'local_path': MatchAnyString(), - 'created_at': MatchAnyString(), - 'convert_bson_to_ftdc': True, - 'permissions': 'public-read', - 'prefix': 'my_task_id_1' - }], - 'metrics': None, - 'sub_tests': None - }, { - 'info': { - 'test_name': 'InsertRemove-Insert', - 'trial': 0, - 'tags': [], - 'args': {} - }, - 'created_at': MatchAnyString(), - 'completed_at': MatchAnyString(), - 'artifacts': [{ - 'bucket': 'genny-metrics', - 'path': 'InsertRemove-Insert', - 'tags': [], - 'local_path': MatchAnyString(), - 'created_at': MatchAnyString(), - 'convert_bson_to_ftdc': True, - 'permissions': 'public-read', - 'prefix': 'my_task_id_1' - }], - 'metrics': None, - 'sub_tests': None - }, { - 'info': { - 'test_name': 'InsertRemove-Remove', - 'trial': 0, - 'tags': [], - 'args': {} - }, - 'created_at': MatchAnyString(), - 'completed_at': MatchAnyString(), - 'artifacts': [{ - 'bucket': 'genny-metrics', - 'path': 'InsertRemove-Remove', - 'tags': [], - 'local_path': MatchAnyString(), - 'created_at': MatchAnyString(), - 'convert_bson_to_ftdc': True, - 'permissions': 'public-read', - 'prefix': 'my_task_id_1' - }], - 'metrics': None, - 'sub_tests': None - }] - }], - 'bucket': { - 'api_key': 'my_aws_key', - 'api_secret': 'my_aws_secret', - 'api_token': None, - 'region': 'us-east-1', - 'name': 'genny-metrics', - } + "project": "my_project", + "version": "my_version", + "order": 1, + "variant": "my_variant", + "task_name": "my_task_name", + "task_id": "my_task_id", + "execution_number": 1, + "mainline": False, + "tests": [ + { + "info": {"test_name": "my_task_name", "trial": 0, "tags": [], "args": {}}, + "created_at": _FIXED_DATETIME, + "completed_at": _FIXED_DATETIME, + "artifacts": [], + "metrics": None, + "sub_tests": [ + { + "info": { + "test_name": "HelloWorld-Greetings", + "trial": 0, + "tags": [], + "args": {}, + }, + "created_at": MatchAnyString(), + "completed_at": MatchAnyString(), + "artifacts": [ + { + "bucket": "genny-metrics", + "path": "HelloWorld-Greetings", + "tags": [], + "local_path": MatchAnyString(), + "created_at": MatchAnyString(), + "convert_bson_to_ftdc": True, + "permissions": "public-read", + "prefix": "my_task_id_1", + } + ], + "metrics": None, + "sub_tests": None, + }, + { + "info": { + "test_name": "InsertRemove-Insert", + "trial": 0, + "tags": [], + "args": {}, + }, + "created_at": MatchAnyString(), + "completed_at": MatchAnyString(), + "artifacts": [ + { + "bucket": "genny-metrics", + "path": "InsertRemove-Insert", + "tags": [], + "local_path": MatchAnyString(), + "created_at": MatchAnyString(), + "convert_bson_to_ftdc": True, + "permissions": "public-read", + "prefix": "my_task_id_1", + } + ], + "metrics": None, + "sub_tests": None, + }, + { + "info": { + "test_name": "InsertRemove-Remove", + "trial": 0, + "tags": [], + "args": {}, + }, + "created_at": MatchAnyString(), + "completed_at": MatchAnyString(), + "artifacts": [ + { + "bucket": "genny-metrics", + "path": "InsertRemove-Remove", + "tags": [], + "local_path": MatchAnyString(), + "created_at": MatchAnyString(), + "convert_bson_to_ftdc": True, + "permissions": "public-read", + "prefix": "my_task_id_1", + } + ], + "metrics": None, + "sub_tests": None, + }, + ], + } + ], + "bucket": { + "api_key": "my_aws_key", + "api_secret": "my_aws_secret", + "api_token": None, + "region": "us-east-1", + "name": "genny-metrics", + }, } with tempfile.TemporaryDirectory() as output_dir: - argv = [get_fixture('cedar', 'shared_with_cxx_metrics_test.csv'), output_dir] + argv = [get_fixture("cedar", "shared_with_cxx_metrics_test.csv"), output_dir] main__cedar_report(argv, mock_env, _NoopCertRetriever) - with open('cedar_report.json') as f: + with open("cedar_report.json") as f: report_json = json.load(f) # Verify just the top-level date fields for correct formatting. - report_json['tests'][0]['created_at'] = _FIXED_DATETIME - report_json['tests'][0]['completed_at'] = _FIXED_DATETIME + report_json["tests"][0]["created_at"] = _FIXED_DATETIME + report_json["tests"][0]["completed_at"] = _FIXED_DATETIME self.assertDictEqual(expected_json, report_json) mock_uploader_run.assert_called_with(expected_uploader_run_args) - @patch('gennylib.cedar_report.ShellCuratorRunner.run') + @patch("gennylib.cedar_report.ShellCuratorRunner.run") def test_cedar_mode_skip(self, mock_uploader_run): - mock_env = { - 'cedar_mode': 'skip', - } + mock_env = {"cedar_mode": "skip"} with tempfile.TemporaryDirectory() as output_dir: - argv = [get_fixture('cedar', 'shared_with_cxx_metrics_test.csv'), output_dir] + argv = [get_fixture("cedar", "shared_with_cxx_metrics_test.csv"), output_dir] main__cedar_report(argv, mock_env, _NoopCertRetriever) mock_uploader_run.assert_not_called() - diff --git a/src/python/tests/cedar_test.py b/src/python/tests/cedar_test.py index 94379e52dc..39ea9e2faf 100644 --- a/src/python/tests/cedar_test.py +++ b/src/python/tests/cedar_test.py @@ -26,26 +26,24 @@ def get_fixture(*csv_file_path): - return pjoin('tests', 'fixtures', *csv_file_path) + return pjoin("tests", "fixtures", *csv_file_path) class CedarTest(unittest.TestCase): - def test_split_csv2(self): large_precise_float = 10 ** 15 num_cols = len(gennylib.parsers.csv2.IntermediateCSVColumns.default_columns()) mock_data_reader = [ - (['first' for _ in range(num_cols)], 'a1'), - ([1 for _ in range(num_cols)], 'a1'), + (["first" for _ in range(num_cols)], "a1"), + ([1 for _ in range(num_cols)], "a1"), # Store a large number to make sure precision is not lost. Python csv converts # numbers to floats by default, which has 2^53 or 10^15 precision. Unix time in # milliseconds is currently 10^13. - ([large_precise_float for _ in range(num_cols)], 'a2') - + ([large_precise_float for _ in range(num_cols)], "a2"), ] with tempfile.TemporaryDirectory() as output_dir: output_files = cedar.split_into_actor_csv_files(mock_data_reader, output_dir) - self.assertEqual(output_files, ['a1.csv', 'a2.csv']) + self.assertEqual(output_files, ["a1.csv", "a2.csv"]) a1o1 = pjoin(output_dir, output_files[0]) self.assertTrue(os.path.isfile(a1o1)) @@ -72,7 +70,7 @@ def verify_output(self, bson_metrics_file_name, expected_results, check_last_row cumulative, this likely means previous rows are all correct as well. :return: """ - with open(bson_metrics_file_name, 'rb') as f: + with open(bson_metrics_file_name, "rb") as f: options = CodecOptions(document_class=OrderedDict) index = 0 if check_last_row_only: @@ -84,161 +82,113 @@ def verify_output(self, bson_metrics_file_name, expected_results, check_last_row index += 1 def test_cedar_main(self): - expected_result_insert = OrderedDict([ - ('ts', datetime(1970, 1, 1, 2, 46, 40)), - ('id', 0), - ('counters', OrderedDict([ - ('n', 9), - ('ops', 58), - ('size', 350), - ('errors', 23) - ])), - ('timers', OrderedDict([ - ('duration', 1320), - ('total', 1518) - ])), - ('gauges', OrderedDict([('workers', 5)])) - ]) - - expected_result_remove = OrderedDict([ - ('ts', datetime(1970, 1, 1, 2, 46, 40)), - ('id', 0), - ('counters', OrderedDict([ - ('n', 9), - ('ops', 58), - ('size', 257), - ('errors', 25) - ])), - ('timers', OrderedDict([ - ('duration', 1392), - ('total', 1598) - ])), - ('gauges', OrderedDict([('workers', 5)])) - ]) + expected_result_insert = OrderedDict( + [ + ("ts", datetime(1970, 1, 1, 2, 46, 40)), + ("id", 0), + ("counters", OrderedDict([("n", 9), ("ops", 58), ("size", 350), ("errors", 23)])), + ("timers", OrderedDict([("duration", 1320), ("total", 1518)])), + ("gauges", OrderedDict([("workers", 5)])), + ] + ) + + expected_result_remove = OrderedDict( + [ + ("ts", datetime(1970, 1, 1, 2, 46, 40)), + ("id", 0), + ("counters", OrderedDict([("n", 9), ("ops", 58), ("size", 257), ("errors", 25)])), + ("timers", OrderedDict([("duration", 1392), ("total", 1598)])), + ("gauges", OrderedDict([("workers", 5)])), + ] + ) with tempfile.TemporaryDirectory() as output_dir: - args = [ - get_fixture('cedar', 'two_op.csv'), - output_dir - ] + args = [get_fixture("cedar", "two_op.csv"), output_dir] cedar.main__cedar(args) self.verify_output( - pjoin(output_dir, 'InsertRemove-Insert.bson'), + pjoin(output_dir, "InsertRemove-Insert.bson"), expected_result_insert, - check_last_row_only=True + check_last_row_only=True, ) self.verify_output( - pjoin(output_dir, 'InsertRemove-Remove.bson'), + pjoin(output_dir, "InsertRemove-Remove.bson"), expected_result_remove, - check_last_row_only=True + check_last_row_only=True, ) def test_cedar_main_2(self): - expected_result_greetings = OrderedDict([ - # The operation duration can be ignored because they're a few ns. - ('ts', datetime.utcfromtimestamp(42 / 1000)), - ('id', 3), - ('counters', OrderedDict([ - ('n', 2), - ('ops', 0), - ('size', 0), - ('errors', 0) - ])), - ('timers', OrderedDict([ - ('duration', 13), - ('total', 13) - ])), - ('gauges', OrderedDict([('workers', 1)])) - ]) + expected_result_greetings = OrderedDict( + [ + # The operation duration can be ignored because they're a few ns. + ("ts", datetime.utcfromtimestamp(42 / 1000)), + ("id", 3), + ("counters", OrderedDict([("n", 2), ("ops", 0), ("size", 0), ("errors", 0)])), + ("timers", OrderedDict([("duration", 13), ("total", 13)])), + ("gauges", OrderedDict([("workers", 1)])), + ] + ) expected_result_insert = [ - OrderedDict([ - ('ts', datetime.utcfromtimestamp(42 / 1000)), - ('id', 1), - ('counters', OrderedDict([ - ('n', 1), - ('ops', 9), - ('size', 300), - ('errors', 0) - ])), - ('timers', OrderedDict([ - ('duration', 23), - ('total', 23) - ])), - ('gauges', OrderedDict([('workers', 2)])) - ]), - OrderedDict([ - ('ts', datetime.utcfromtimestamp(42 / 1000)), - ('id', 2), - ('counters', OrderedDict([ - ('n', 2), - ('ops', 17), - ('size', 500), - ('errors', 0) - ])), - ('timers', OrderedDict([ - ('duration', 43), - ('total', 43) - ])), - ('gauges', OrderedDict([('workers', 2)])) - ]), + OrderedDict( + [ + ("ts", datetime.utcfromtimestamp(42 / 1000)), + ("id", 1), + ("counters", OrderedDict([("n", 1), ("ops", 9), ("size", 300), ("errors", 0)])), + ("timers", OrderedDict([("duration", 23), ("total", 23)])), + ("gauges", OrderedDict([("workers", 2)])), + ] + ), + OrderedDict( + [ + ("ts", datetime.utcfromtimestamp(42 / 1000)), + ("id", 2), + ( + "counters", + OrderedDict([("n", 2), ("ops", 17), ("size", 500), ("errors", 0)]), + ), + ("timers", OrderedDict([("duration", 43), ("total", 43)])), + ("gauges", OrderedDict([("workers", 2)])), + ] + ), ] expected_result_remove = [ - OrderedDict([ - ('ts', datetime.utcfromtimestamp(42 / 1000)), - ('id', 2), - ('counters', OrderedDict([ - ('n', 1), - ('ops', 7), - ('size', 30), - ('errors', 0) - ])), - ('timers', OrderedDict([ - ('duration', 10), - ('total', 10) - ])), - ('gauges', OrderedDict([('workers', 2)])) - ]), - OrderedDict([ - ('ts', datetime.utcfromtimestamp(42 / 1000)), - ('id', 1), - ('counters', OrderedDict([ - ('n', 2), - ('ops', 13), - ('size', 70), - ('errors', 0) - ])), - ('timers', OrderedDict([ - ('duration', 27), - ('total', 27) - ])), - ('gauges', OrderedDict([('workers', 2)])) - ]), + OrderedDict( + [ + ("ts", datetime.utcfromtimestamp(42 / 1000)), + ("id", 2), + ("counters", OrderedDict([("n", 1), ("ops", 7), ("size", 30), ("errors", 0)])), + ("timers", OrderedDict([("duration", 10), ("total", 10)])), + ("gauges", OrderedDict([("workers", 2)])), + ] + ), + OrderedDict( + [ + ("ts", datetime.utcfromtimestamp(42 / 1000)), + ("id", 1), + ("counters", OrderedDict([("n", 2), ("ops", 13), ("size", 70), ("errors", 0)])), + ("timers", OrderedDict([("duration", 27), ("total", 27)])), + ("gauges", OrderedDict([("workers", 2)])), + ] + ), ] with tempfile.TemporaryDirectory() as output_dir: - args = [ - get_fixture('cedar', 'shared_with_cxx_metrics_test.csv'), - output_dir - ] + args = [get_fixture("cedar", "shared_with_cxx_metrics_test.csv"), output_dir] cedar.main__cedar(args) self.verify_output( - pjoin(output_dir, 'HelloWorld-Greetings.bson'), - [expected_result_greetings], + pjoin(output_dir, "HelloWorld-Greetings.bson"), [expected_result_greetings] ) self.verify_output( - pjoin(output_dir, 'InsertRemove-Insert.bson'), - expected_result_insert, + pjoin(output_dir, "InsertRemove-Insert.bson"), expected_result_insert ) self.verify_output( - pjoin(output_dir, 'InsertRemove-Remove.bson'), - expected_result_remove, + pjoin(output_dir, "InsertRemove-Remove.bson"), expected_result_remove ) diff --git a/src/python/tests/csv2_test.py b/src/python/tests/csv2_test.py index 29dbd87ef2..6a32db582b 100644 --- a/src/python/tests/csv2_test.py +++ b/src/python/tests/csv2_test.py @@ -5,34 +5,34 @@ class CSV2Test(unittest.TestCase): - @staticmethod def get_fixture(*file_path): - return os.path.join('tests', 'fixtures', 'cedar', *file_path) + return os.path.join("tests", "fixtures", "cedar", *file_path) def test_basic_parsing(self): - test_csv = csv2.CSV2(self.get_fixture('barebones.csv')) + test_csv = csv2.CSV2(self.get_fixture("barebones.csv")) with test_csv.data_reader() as _: self.assertEqual(test_csv._unix_epoch_offset_ns, 90 * (10 ** 9)) op_map = test_csv._operation_thread_count_map - self.assertDictEqual(op_map, {('MyActor', 'MyOperation'): 2}) + self.assertDictEqual(op_map, {("MyActor", "MyOperation"): 2}) def test_data_reader(self): - test_csv = csv2.CSV2(self.get_fixture('barebones.csv')) + test_csv = csv2.CSV2(self.get_fixture("barebones.csv")) with test_csv.data_reader() as dr: - self.assertEqual(next(dr), - ([102345.0, 12345000000, 0, 'MyOperation', 100, 0, 1, 6, 2, 40, 2], - 'MyActor')) + self.assertEqual( + next(dr), + ([102345.0, 12345000000, 0, "MyOperation", 100, 0, 1, 6, 2, 40, 2], "MyActor"), + ) def test_error_outcome(self): - test_csv = csv2.CSV2(self.get_fixture('error_outcome.csv')) + test_csv = csv2.CSV2(self.get_fixture("error_outcome.csv")) with test_csv.data_reader() as dr: next(dr) - with self.assertRaisesRegex(csv2.CSV2ParsingError, 'Unexpected outcome on line'): + with self.assertRaisesRegex(csv2.CSV2ParsingError, "Unexpected outcome on line"): next(dr) next(dr) def test_missing_clock_header(self): - with self.assertRaisesRegex(csv2.CSV2ParsingError, 'Unknown csv2 section title'): - with csv2.CSV2(self.get_fixture('invalid_title.csv')).data_reader() as _: + with self.assertRaisesRegex(csv2.CSV2ParsingError, "Unknown csv2 section title"): + with csv2.CSV2(self.get_fixture("invalid_title.csv")).data_reader() as _: pass diff --git a/src/python/tests/fixtures/auto_tasks_fixtures.py b/src/python/tests/fixtures/auto_tasks_fixtures.py index f41261c724..7614d63f4c 100644 --- a/src/python/tests/fixtures/auto_tasks_fixtures.py +++ b/src/python/tests/fixtures/auto_tasks_fixtures.py @@ -1,303 +1,187 @@ - # 3-Tuples of (workload_dict, env_dict, expected) workload_should_autorun_cases = [ - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['linux'], - 'storageEngine': ['wiredTiger'], - 'build_variant': ['variant1'], - 'is_patch': ['true'] - } - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - True - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['linux'] - } - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger', - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - True - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['linux'] - } - } - }, - { - 'expansions': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger', - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - True - ), - ( - { - 'AutoRun': { - 'Requires': {} - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - True - ), - ( - { - 'AutoRun': { - 'Requires': {} - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - True - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['osx', 'windows', 'linux'], - 'build_variant': ['variant1'], - 'is_patch': ['true'] - } - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - True - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['osx', 'windows', 'debian'], - 'build_variant': ['variant1'], - 'is_patch': ['true'] - } - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['osx', 'windows', 'linux'], - 'build_variant': ['variant1'], - 'is_patch': ['false'] - } - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['linux'], - 'storageEngine': ['wiredTiger'], - 'build_variant': ['variant1'], - 'is_patch': ['true'] - } - } - }, - { - 'bootstrap': { - 'platform': 'osx', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['linux'], - 'storageEngine': ['wiredTiger'], - 'build_variant': ['variant1'], - 'is_patch': ['true'], - 'key': ['value'] - } - } - }, - { - 'bootstrap': { - 'platform': 'osx', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['linux'], - 'storageEngine': ['other'] - } - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ( - {}, - { - 'bootstrap': { - 'platform': ['linux'], - 'storageEngine': ['wiredTiger'] - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ( - { - 'AutoRun': {} - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ( - { - 'AutoRun': 'not-a-dict' - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ( - { - 'AutoRun': { - 'Requires': { - 'platform': ['linux'], - 'storageEngine': ['wiredTiger'], - 'runtime': ['string-runtime'] - } - } - }, - { - 'bootstrap': { - 'platform': 'linux', - 'storageEngine': 'wiredTiger' - }, - 'runtime': { - 'build_variant': 'variant1', - 'is_patch': 'true' - } - }, - False - ), - ] + ( + { + "AutoRun": { + "Requires": { + "platform": ["linux"], + "storageEngine": ["wiredTiger"], + "build_variant": ["variant1"], + "is_patch": ["true"], + } + } + }, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + True, + ), + ( + {"AutoRun": {"Requires": {"platform": ["linux"]}}}, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + True, + ), + ( + {"AutoRun": {"Requires": {"platform": ["linux"]}}}, + { + "expansions": { + "platform": "linux", + "storageEngine": "wiredTiger", + "build_variant": "variant1", + "is_patch": "true", + } + }, + True, + ), + ( + {"AutoRun": {"Requires": {}}}, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + True, + ), + ( + {"AutoRun": {"Requires": {}}}, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + True, + ), + ( + { + "AutoRun": { + "Requires": { + "platform": ["osx", "windows", "linux"], + "build_variant": ["variant1"], + "is_patch": ["true"], + } + } + }, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + True, + ), + ( + { + "AutoRun": { + "Requires": { + "platform": ["osx", "windows", "debian"], + "build_variant": ["variant1"], + "is_patch": ["true"], + } + } + }, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), + ( + { + "AutoRun": { + "Requires": { + "platform": ["osx", "windows", "linux"], + "build_variant": ["variant1"], + "is_patch": ["false"], + } + } + }, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), + ( + { + "AutoRun": { + "Requires": { + "platform": ["linux"], + "storageEngine": ["wiredTiger"], + "build_variant": ["variant1"], + "is_patch": ["true"], + } + } + }, + { + "bootstrap": {"platform": "osx", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), + ( + { + "AutoRun": { + "Requires": { + "platform": ["linux"], + "storageEngine": ["wiredTiger"], + "build_variant": ["variant1"], + "is_patch": ["true"], + "key": ["value"], + } + } + }, + { + "bootstrap": {"platform": "osx", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), + ( + {"AutoRun": {"Requires": {"platform": ["linux"], "storageEngine": ["other"]}}}, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), + ( + {}, + { + "bootstrap": {"platform": ["linux"], "storageEngine": ["wiredTiger"]}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), + ( + {"AutoRun": {}}, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), + ( + {"AutoRun": "not-a-dict"}, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), + ( + { + "AutoRun": { + "Requires": { + "platform": ["linux"], + "storageEngine": ["wiredTiger"], + "runtime": ["string-runtime"], + } + } + }, + { + "bootstrap": {"platform": "linux", "storageEngine": "wiredTiger"}, + "runtime": {"build_variant": "variant1", "is_patch": "true"}, + }, + False, + ), +] diff --git a/src/python/tests/legacy_report_test.py b/src/python/tests/legacy_report_test.py index 4802a9d275..4b78d0010d 100644 --- a/src/python/tests/legacy_report_test.py +++ b/src/python/tests/legacy_report_test.py @@ -7,52 +7,52 @@ class LegacyReportTest(unittest.TestCase): - @staticmethod def get_fixture(*file_path): - return os.path.join('tests', 'fixtures', 'cedar', *file_path) + return os.path.join("tests", "fixtures", "cedar", *file_path) def test_legacy_report(self): expected_json = { - 'results': [{ - 'name': 'HelloWorld-Greetings', - 'workload': 'HelloWorld-Greetings', - 'start': 419.99968, - 'end': 419.99981, - 'results': { - '1': { - 'ops_per_sec': 76923076.92307693, - 'ops_per_sec_values': [76923076.92307693] - } - } - }, { - 'name': 'InsertRemove-Remove', - 'workload': 'InsertRemove-Remove', - 'start': 419.99983, - 'end': 420.0, - 'results': { - '2': { - 'ops_per_sec': 117647058.8235294, - 'ops_per_sec_values': [117647058.8235294] - } - } - }, { - 'name': 'InsertRemove-Insert', - 'workload': 'InsertRemove-Insert', - 'start': 419.9996, - 'end': 419.99985, - 'results': { - '2': { - 'ops_per_sec': 80000000.0, - 'ops_per_sec_values': [80000000.0] - } - } - }] + "results": [ + { + "name": "HelloWorld-Greetings", + "workload": "HelloWorld-Greetings", + "start": 419.99968, + "end": 419.99981, + "results": { + "1": { + "ops_per_sec": 76923076.92307693, + "ops_per_sec_values": [76923076.92307693], + } + }, + }, + { + "name": "InsertRemove-Remove", + "workload": "InsertRemove-Remove", + "start": 419.99983, + "end": 420.0, + "results": { + "2": { + "ops_per_sec": 117647058.8235294, + "ops_per_sec_values": [117647058.8235294], + } + }, + }, + { + "name": "InsertRemove-Insert", + "workload": "InsertRemove-Insert", + "start": 419.9996, + "end": 419.99985, + "results": { + "2": {"ops_per_sec": 80000000.0, "ops_per_sec_values": [80000000.0]} + }, + }, + ] } with tempfile.TemporaryDirectory() as output_dir: - out_file = os.path.join(output_dir, 'perf.json') - argv = [self.get_fixture('shared_with_cxx_metrics_test.csv'), '--report-file', out_file] + out_file = os.path.join(output_dir, "perf.json") + argv = [self.get_fixture("shared_with_cxx_metrics_test.csv"), "--report-file", out_file] main__legacy_report(argv) with open(out_file) as f: diff --git a/src/python/third_party/csvsort.py b/src/python/third_party/csvsort.py index f955502882..af40cd3931 100644 --- a/src/python/third_party/csvsort.py +++ b/src/python/third_party/csvsort.py @@ -22,14 +22,16 @@ class CsvSortError(Exception): pass -def csvsort(input_filename, - columns, - output_filename=None, - max_size=100, - has_header=True, - delimiter=',', - show_progress=False, - quoting=csv.QUOTE_MINIMAL): +def csvsort( + input_filename, + columns, + output_filename=None, + max_size=100, + has_header=True, + delimiter=",", + show_progress=False, + quoting=csv.QUOTE_MINIMAL, +): """Sort the CSV file on disk rather than in memory. The merge sort algorithm is used to break the file into smaller sub files @@ -62,14 +64,14 @@ def csvsort(input_filename, filenames = csvsplit(reader, max_size, quoting) if show_progress: - print('Merging %d splits' % len(filenames)) + print("Merging %d splits" % len(filenames)) for filename in filenames: memorysort(filename, columns, quoting) sorted_filename = mergesort(filenames, columns, quoting) # XXX make more efficient by passing quoting, delimiter, and moving result # generate the final output file - with open(output_filename or input_filename, 'w') as output_fp: + with open(output_filename or input_filename, "w") as output_fp: writer = csv.writer(output_fp, delimiter=delimiter, quoting=quoting) if header: writer.writerow(header) @@ -87,20 +89,18 @@ def parse_columns(columns, header): if isinstance(column, int): if header: if column >= len(header): - raise CsvSortError( - 'Column index is out of range: "{}"'.format(column)) + raise CsvSortError('Column index is out of range: "{}"'.format(column)) else: # find index of column from header if header is None: raise CsvSortError( - 'CSV needs a header to find index of this column name:' + - ' "{}"'.format(column)) + "CSV needs a header to find index of this column name:" + ' "{}"'.format(column) + ) else: if column in header: columns[i] = header.index(column) else: - raise CsvSortError( - 'Column name is not in header: "{}"'.format(column)) + raise CsvSortError('Column name is not in header: "{}"'.format(column)) return columns @@ -115,7 +115,7 @@ def csvsplit(reader, max_size, quoting): # break CSV file into smaller merge files for row in reader: if writer is None: - ntf = tempfile.NamedTemporaryFile(delete=False, mode='w') + ntf = tempfile.NamedTemporaryFile(delete=False, mode="w") writer = csv.writer(ntf, quoting=quoting) split_filenames.append(ntf.name) @@ -133,7 +133,7 @@ def memorysort(filename, columns, quoting): with open(filename) as input_fp: rows = [row for row in csv.reader(input_fp, quoting=quoting)] rows.sort(key=lambda row: get_key(row, columns)) - with open(filename, 'w') as output_fp: + with open(filename, "w") as output_fp: writer = csv.writer(output_fp, quoting=quoting) for row in rows: writer.writerow(row) @@ -161,14 +161,14 @@ def mergesort(sorted_filenames, columns, quoting, nway=100): """ merge_n = 0 while len(sorted_filenames) > 1: - merge_filenames, sorted_filenames = \ - sorted_filenames[:nway], sorted_filenames[nway:] + merge_filenames, sorted_filenames = sorted_filenames[:nway], sorted_filenames[nway:] - with tempfile.NamedTemporaryFile(delete=False, mode='w') as output_fp: + with tempfile.NamedTemporaryFile(delete=False, mode="w") as output_fp: writer = csv.writer(output_fp, quoting=quoting) merge_n += 1 - for _, row in heapq.merge(*[decorated_csv(filename, columns, quoting) - for filename in merge_filenames]): + for _, row in heapq.merge( + *[decorated_csv(filename, columns, quoting) for filename in merge_filenames] + ): writer.writerow(row) sorted_filenames.append(output_fp.name) @@ -181,48 +181,43 @@ def mergesort(sorted_filenames, columns, quoting, nway=100): def main(): parser = OptionParser() parser.add_option( - '-c', - '--column', - dest='columns', - action='append', - help='column of CSV to sort on') + "-c", "--column", dest="columns", action="append", help="column of CSV to sort on" + ) parser.add_option( - '-s', - '--size', - dest='max_size', - type='float', + "-s", + "--size", + dest="max_size", + type="float", default=100, - help='maximum size of each split CSV file in MB (default 100)') + help="maximum size of each split CSV file in MB (default 100)", + ) parser.add_option( - '-n', - '--no-header', - dest='has_header', - action='store_false', + "-n", + "--no-header", + dest="has_header", + action="store_false", default=True, - help='set CSV file has no header') - parser.add_option( - '-d', - '--delimiter', - default=',', - help='set CSV delimiter (default ",")') + help="set CSV file has no header", + ) + parser.add_option("-d", "--delimiter", default=",", help='set CSV delimiter (default ",")') args, input_files = parser.parse_args() if not input_files: - parser.error('What CSV file should be sorted?') + parser.error("What CSV file should be sorted?") elif not args.columns: - parser.error('Which columns should be sorted on?') + parser.error("Which columns should be sorted on?") else: # escape backslashes - args.delimiter = args.delimiter.decode('string_escape') - args.columns = [int(column) if column.isdigit() else column - for column in args.columns] + args.delimiter = args.delimiter.decode("string_escape") + args.columns = [int(column) if column.isdigit() else column for column in args.columns] csvsort( input_files[0], columns=args.columns, max_size=args.max_size, has_header=args.has_header, - delimiter=args.delimiter) + delimiter=args.delimiter, + ) -if __name__ == '__main__': +if __name__ == "__main__": main()