From a0e7bfdc244160db989f64c5d71a62aae3f13711 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 10 Mar 2025 00:52:56 +0100 Subject: [PATCH 01/21] [docs] Add basic configuration with Sphinx --- docs/Makefile | 20 ++++++ docs/source/api/modules.rst | 7 ++ docs/source/api/sebs.aws.rst | 61 ++++++++++++++++ docs/source/api/sebs.azure.rst | 61 ++++++++++++++++ docs/source/api/sebs.experiments.rst | 85 +++++++++++++++++++++++ docs/source/api/sebs.faas.rst | 53 ++++++++++++++ docs/source/api/sebs.gcp.rst | 53 ++++++++++++++ docs/source/api/sebs.local.rst | 61 ++++++++++++++++ docs/source/api/sebs.openwhisk.rst | 61 ++++++++++++++++ docs/source/api/sebs.rst | 100 +++++++++++++++++++++++++++ docs/source/api/sebs.storage.rst | 29 ++++++++ docs/source/conf.py | 35 ++++++++++ docs/source/index.rst | 18 +++++ 13 files changed, 644 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/source/api/modules.rst create mode 100644 docs/source/api/sebs.aws.rst create mode 100644 docs/source/api/sebs.azure.rst create mode 100644 docs/source/api/sebs.experiments.rst create mode 100644 docs/source/api/sebs.faas.rst create mode 100644 docs/source/api/sebs.gcp.rst create mode 100644 docs/source/api/sebs.local.rst create mode 100644 docs/source/api/sebs.openwhisk.rst create mode 100644 docs/source/api/sebs.rst create mode 100644 docs/source/api/sebs.storage.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/source/api/modules.rst b/docs/source/api/modules.rst new file mode 100644 index 00000000..d4bd9f34 --- /dev/null +++ b/docs/source/api/modules.rst @@ -0,0 +1,7 @@ +sebs +==== + +.. toctree:: + :maxdepth: 4 + + sebs diff --git a/docs/source/api/sebs.aws.rst b/docs/source/api/sebs.aws.rst new file mode 100644 index 00000000..23b3df24 --- /dev/null +++ b/docs/source/api/sebs.aws.rst @@ -0,0 +1,61 @@ +sebs.aws package +================ + +Submodules +---------- + +sebs.aws.aws module +------------------- + +.. automodule:: sebs.aws.aws + :members: + :undoc-members: + :show-inheritance: + +sebs.aws.config module +---------------------- + +.. automodule:: sebs.aws.config + :members: + :undoc-members: + :show-inheritance: + +sebs.aws.container module +------------------------- + +.. automodule:: sebs.aws.container + :members: + :undoc-members: + :show-inheritance: + +sebs.aws.function module +------------------------ + +.. automodule:: sebs.aws.function + :members: + :undoc-members: + :show-inheritance: + +sebs.aws.s3 module +------------------ + +.. automodule:: sebs.aws.s3 + :members: + :undoc-members: + :show-inheritance: + +sebs.aws.triggers module +------------------------ + +.. automodule:: sebs.aws.triggers + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs.aws + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/sebs.azure.rst b/docs/source/api/sebs.azure.rst new file mode 100644 index 00000000..d172aabf --- /dev/null +++ b/docs/source/api/sebs.azure.rst @@ -0,0 +1,61 @@ +sebs.azure package +================== + +Submodules +---------- + +sebs.azure.azure module +----------------------- + +.. automodule:: sebs.azure.azure + :members: + :undoc-members: + :show-inheritance: + +sebs.azure.blob\_storage module +------------------------------- + +.. automodule:: sebs.azure.blob_storage + :members: + :undoc-members: + :show-inheritance: + +sebs.azure.cli module +--------------------- + +.. automodule:: sebs.azure.cli + :members: + :undoc-members: + :show-inheritance: + +sebs.azure.config module +------------------------ + +.. automodule:: sebs.azure.config + :members: + :undoc-members: + :show-inheritance: + +sebs.azure.function module +-------------------------- + +.. automodule:: sebs.azure.function + :members: + :undoc-members: + :show-inheritance: + +sebs.azure.triggers module +-------------------------- + +.. automodule:: sebs.azure.triggers + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs.azure + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/sebs.experiments.rst b/docs/source/api/sebs.experiments.rst new file mode 100644 index 00000000..96e753e6 --- /dev/null +++ b/docs/source/api/sebs.experiments.rst @@ -0,0 +1,85 @@ +sebs.experiments package +======================== + +Submodules +---------- + +sebs.experiments.config module +------------------------------ + +.. automodule:: sebs.experiments.config + :members: + :undoc-members: + :show-inheritance: + +sebs.experiments.environment module +----------------------------------- + +.. automodule:: sebs.experiments.environment + :members: + :undoc-members: + :show-inheritance: + +sebs.experiments.eviction\_model module +--------------------------------------- + +.. automodule:: sebs.experiments.eviction_model + :members: + :undoc-members: + :show-inheritance: + +sebs.experiments.experiment module +---------------------------------- + +.. automodule:: sebs.experiments.experiment + :members: + :undoc-members: + :show-inheritance: + +sebs.experiments.invocation\_overhead module +-------------------------------------------- + +.. automodule:: sebs.experiments.invocation_overhead + :members: + :undoc-members: + :show-inheritance: + +sebs.experiments.network\_ping\_pong module +------------------------------------------- + +.. automodule:: sebs.experiments.network_ping_pong + :members: + :undoc-members: + :show-inheritance: + +sebs.experiments.perf\_cost module +---------------------------------- + +.. automodule:: sebs.experiments.perf_cost + :members: + :undoc-members: + :show-inheritance: + +sebs.experiments.result module +------------------------------ + +.. automodule:: sebs.experiments.result + :members: + :undoc-members: + :show-inheritance: + +sebs.experiments.startup\_time module +------------------------------------- + +.. automodule:: sebs.experiments.startup_time + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs.experiments + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/sebs.faas.rst b/docs/source/api/sebs.faas.rst new file mode 100644 index 00000000..56258845 --- /dev/null +++ b/docs/source/api/sebs.faas.rst @@ -0,0 +1,53 @@ +sebs.faas package +================= + +Submodules +---------- + +sebs.faas.config module +----------------------- + +.. automodule:: sebs.faas.config + :members: + :undoc-members: + :show-inheritance: + +sebs.faas.container module +-------------------------- + +.. automodule:: sebs.faas.container + :members: + :undoc-members: + :show-inheritance: + +sebs.faas.function module +------------------------- + +.. automodule:: sebs.faas.function + :members: + :undoc-members: + :show-inheritance: + +sebs.faas.storage module +------------------------ + +.. automodule:: sebs.faas.storage + :members: + :undoc-members: + :show-inheritance: + +sebs.faas.system module +----------------------- + +.. automodule:: sebs.faas.system + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs.faas + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/sebs.gcp.rst b/docs/source/api/sebs.gcp.rst new file mode 100644 index 00000000..ffcd9920 --- /dev/null +++ b/docs/source/api/sebs.gcp.rst @@ -0,0 +1,53 @@ +sebs.gcp package +================ + +Submodules +---------- + +sebs.gcp.config module +---------------------- + +.. automodule:: sebs.gcp.config + :members: + :undoc-members: + :show-inheritance: + +sebs.gcp.function module +------------------------ + +.. automodule:: sebs.gcp.function + :members: + :undoc-members: + :show-inheritance: + +sebs.gcp.gcp module +------------------- + +.. automodule:: sebs.gcp.gcp + :members: + :undoc-members: + :show-inheritance: + +sebs.gcp.storage module +----------------------- + +.. automodule:: sebs.gcp.storage + :members: + :undoc-members: + :show-inheritance: + +sebs.gcp.triggers module +------------------------ + +.. automodule:: sebs.gcp.triggers + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs.gcp + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/sebs.local.rst b/docs/source/api/sebs.local.rst new file mode 100644 index 00000000..32a34143 --- /dev/null +++ b/docs/source/api/sebs.local.rst @@ -0,0 +1,61 @@ +sebs.local package +================== + +Submodules +---------- + +sebs.local.config module +------------------------ + +.. automodule:: sebs.local.config + :members: + :undoc-members: + :show-inheritance: + +sebs.local.deployment module +---------------------------- + +.. automodule:: sebs.local.deployment + :members: + :undoc-members: + :show-inheritance: + +sebs.local.function module +-------------------------- + +.. automodule:: sebs.local.function + :members: + :undoc-members: + :show-inheritance: + +sebs.local.local module +----------------------- + +.. automodule:: sebs.local.local + :members: + :undoc-members: + :show-inheritance: + +sebs.local.measureMem module +---------------------------- + +.. automodule:: sebs.local.measureMem + :members: + :undoc-members: + :show-inheritance: + +sebs.local.storage module +------------------------- + +.. automodule:: sebs.local.storage + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs.local + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/sebs.openwhisk.rst b/docs/source/api/sebs.openwhisk.rst new file mode 100644 index 00000000..2588e594 --- /dev/null +++ b/docs/source/api/sebs.openwhisk.rst @@ -0,0 +1,61 @@ +sebs.openwhisk package +====================== + +Submodules +---------- + +sebs.openwhisk.config module +---------------------------- + +.. automodule:: sebs.openwhisk.config + :members: + :undoc-members: + :show-inheritance: + +sebs.openwhisk.container module +------------------------------- + +.. automodule:: sebs.openwhisk.container + :members: + :undoc-members: + :show-inheritance: + +sebs.openwhisk.function module +------------------------------ + +.. automodule:: sebs.openwhisk.function + :members: + :undoc-members: + :show-inheritance: + +sebs.openwhisk.openwhisk module +------------------------------- + +.. automodule:: sebs.openwhisk.openwhisk + :members: + :undoc-members: + :show-inheritance: + +sebs.openwhisk.storage module +----------------------------- + +.. automodule:: sebs.openwhisk.storage + :members: + :undoc-members: + :show-inheritance: + +sebs.openwhisk.triggers module +------------------------------ + +.. automodule:: sebs.openwhisk.triggers + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs.openwhisk + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/sebs.rst b/docs/source/api/sebs.rst new file mode 100644 index 00000000..a400e0f6 --- /dev/null +++ b/docs/source/api/sebs.rst @@ -0,0 +1,100 @@ +sebs package +============ + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + sebs.aws + sebs.azure + sebs.experiments + sebs.faas + sebs.gcp + sebs.local + sebs.openwhisk + sebs.storage + +Submodules +---------- + +sebs.benchmark module +--------------------- + +.. automodule:: sebs.benchmark + :members: + :undoc-members: + :show-inheritance: + +sebs.cache module +----------------- + +.. automodule:: sebs.cache + :members: + :undoc-members: + :show-inheritance: + +sebs.config module +------------------ + +.. automodule:: sebs.config + :members: + :undoc-members: + :show-inheritance: + +sebs.regression module +---------------------- + +.. automodule:: sebs.regression + :members: + :undoc-members: + :show-inheritance: + +sebs.sebs module +---------------- + +.. automodule:: sebs.sebs + :members: + :undoc-members: + :show-inheritance: + +sebs.statistics module +---------------------- + +.. automodule:: sebs.statistics + :members: + :undoc-members: + :show-inheritance: + +sebs.types module +----------------- + +.. automodule:: sebs.types + :members: + :undoc-members: + :show-inheritance: + +sebs.utils module +----------------- + +.. automodule:: sebs.utils + :members: + :undoc-members: + :show-inheritance: + +sebs.version module +------------------- + +.. automodule:: sebs.version + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/sebs.storage.rst b/docs/source/api/sebs.storage.rst new file mode 100644 index 00000000..dcd2f765 --- /dev/null +++ b/docs/source/api/sebs.storage.rst @@ -0,0 +1,29 @@ +sebs.storage package +==================== + +Submodules +---------- + +sebs.storage.config module +-------------------------- + +.. automodule:: sebs.storage.config + :members: + :undoc-members: + :show-inheritance: + +sebs.storage.minio module +------------------------- + +.. automodule:: sebs.storage.minio + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: sebs.storage + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..2fb1da58 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,35 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import os +import sys +sys.path.insert(0, os.path.abspath('../..')) +#sys.path.insert(0, os.path.abspath('../../sebs')) + +project = 'sebs' +copyright = '2024, Marcin Copik' +author = 'Marcin Copik' +release = '1.2' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode' +] + +templates_path = ['_templates'] +exclude_patterns = [] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_rtd_theme' +html_static_path = ['_static'] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..671a0ebc --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,18 @@ +.. SeBS documentation master file, created by + sphinx-quickstart on Sat Dec 14 03:35:29 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +SeBS documentation +================== + +Add your content using ``reStructuredText`` syntax. See the +`reStructuredText `_ +documentation for details. + + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + api/modules From d079d55963dafbff0ed3c65e69ad38b9253cbca6 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 10 Mar 2025 18:01:53 +0100 Subject: [PATCH 02/21] [azure] Remove old dead code --- sebs/azure/azure.py | 62 --------------------------------------------- 1 file changed, 62 deletions(-) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index d848d724..dd8cf850 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -606,65 +606,3 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: raise NotImplementedError() - - -# -# def create_azure_function(self, fname, config): -# -# # create function name -# region = self.config["config"]["region"] -# # only hyphens are allowed -# # and name needs to be globally unique -# func_name = fname.replace(".", "-").replace("_", "-") -# -# # create function app -# self.cli_instance.execute( -# ( -# "az functionapp create --resource-group {} " -# "--os-type Linux --consumption-plan-location {} " -# "--runtime {} --runtime-version {} --name {} " -# "--storage-account {}" -# ).format( -# self.resource_group_name, -# region, -# self.AZURE_RUNTIMES[self.language], -# self.config["config"]["runtime"][self.language], -# func_name, -# self.storage_account_name, -# ) -# ) -# logging.info("Created function app {}".format(func_name)) -# return func_name -# -# init = False -# -# def create_function_copies( -# self, -# function_names: List[str], -# code_package: Benchmark, -# experiment_config: dict, -# ): -# -# if not self.init: -# code_location = code_package.code_location -# # package = self.package_code(code_location, code_package.benchmark) -# # code_size = code_package.code_size -# # Restart Docker instance to make sure code package is mounted -# self.start(code_location, restart=True) -# self.storage_account() -# self.resource_group() -# self.init = True -# -# # names = [] -# # for fname in function_names: -# # names.append(self.create_azure_function(fname, experiment_config)) -# names = function_names -# -# # time.sleep(30) -# urls = [] -# for fname in function_names: -# url = self.publish_function(fname, repeat_on_failure=True) -# urls.append(url) -# logging.info("Published function app {} with URL {}".format(fname, url)) -# -# return names, urls From 341652221231a665098fea55374555cc6b7f778a Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 10 Mar 2025 18:04:21 +0100 Subject: [PATCH 03/21] [gcp] Remove old dead code --- sebs/gcp/gcp.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 187d8cda..815ba36f 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -732,15 +732,6 @@ def deployment_version(self, func: Function) -> int: status_res = status_req.execute() return int(status_res["versionId"]) - # @abstractmethod - # def get_invocation_error(self, function_name: str, - # start_time: int, end_time: int): - # pass - - # @abstractmethod - # def download_metrics(self): - # pass - """ Helper method for recursive_zip From 445906743bf575eafa422984bec86c1d66e0c717 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 10 Mar 2025 18:05:03 +0100 Subject: [PATCH 04/21] [faas] Remove dead old code --- sebs/faas/storage.py | 46 -------------------------------------------- 1 file changed, 46 deletions(-) diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 5b93c053..92eb1445 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -204,52 +204,6 @@ def benchmark_data( return self.input_prefixes, self.output_prefixes - # def allocate_buckets(self, benchmark: str, requested_buckets: Tuple[int, int]): - - # benchmarks_bucket = self.benchmarks_bucket() - - # Load cached information - # cached_buckets = self.cache_client.get_storage_config(self.deployment_name(), benchmark) - # if cached_buckets: - # cache_valid = True - # for bucket in [ - # *cached_buckets["buckets"]["input"], - # *cached_buckets["buckets"]["output"], - # ]: - # if not self.exists_bucket(bucket): - # cache_valid = False - # self.logging.info(f"Cached storage buckets {bucket} does not exist.") - # break - - # if cache_valid: - # self.input_buckets = cached_buckets["buckets"]["input"] - # for bucket in self.input_buckets: - # self.input_buckets_files.append(self.list_bucket(bucket)) - # self.output_buckets = cached_buckets["buckets"]["output"] - # # for bucket in self.output_buckets: - # # self.clean_bucket(bucket) - # self.cached = True - # self.logging.info( - # "Using cached storage input buckets {}".format(self.input_buckets) - # ) - # self.logging.info( - # "Using cached storage output buckets {}".format(self.output_buckets) - # ) - # return - # else: - # self.logging.info("Cached storage buckets are no longer valid, creating new ones.") - - # buckets = self.list_buckets(self.correct_name(benchmark)) - # for i in range(0, requested_buckets[0]): - # self.input_buckets.append( - # self._create_bucket(self.correct_name("{}-{}-input".format(benchmark, i)), buckets) - # ) - # self.input_buckets_files.append(self.list_bucket(self.input_buckets[-1])) - # for i in range(0, requested_buckets[1]): - # self.output_buckets.append( - # self._create_bucket(self.correct_name("{}-{}-output".format(benchmark, i)), buckets) - # ) - # self.save_storage(benchmark) def get_bucket(self, bucket_type: Resources.StorageBucketType) -> str: From 5e775a151188125cb9e2784cc4fd177e328e313e Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 10 Mar 2025 18:51:39 +0100 Subject: [PATCH 05/21] [system] First batch of docstrings --- sebs/aws/aws.py | 198 ++++++-- sebs/aws/function.py | 71 ++- sebs/benchmark.py | 338 ++++++++++++-- sebs/experiments/__init__.py | 15 + sebs/experiments/config.py | 108 ++++- sebs/experiments/eviction_model.py | 116 ++++- sebs/experiments/experiment.py | 59 ++- sebs/experiments/invocation_overhead.py | 89 +++- sebs/experiments/network_ping_pong.py | 95 +++- sebs/experiments/perf_cost.py | 169 ++++++- sebs/experiments/result.py | 108 ++++- sebs/faas/function.py | 514 +++++++++++++++++++-- sebs/faas/nosql.py | 184 +++++++- sebs/faas/storage.py | 1 - sebs/faas/system.py | 368 +++++++++++---- sebs/regression.py | 574 +++++++++++++++++++++--- sebs/sebs.py | 283 +++++++++++- sebs/statistics.py | 64 ++- sebs/storage/minio.py | 313 ++++++++++++- sebs/types.py | 46 ++ sebs/utils.py | 338 +++++++++++--- 21 files changed, 3660 insertions(+), 391 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 243a6f0f..2aa0f5a2 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -1,3 +1,12 @@ +""" +AWS Lambda implementation for the SeBs framework. + +This module provides the AWS implementation of the FaaS System interface. +It handles deploying and managing serverless functions on AWS Lambda, +including code packaging, function creation, trigger management, and +metrics collection. +""" + import math import os import shutil @@ -25,36 +34,71 @@ class AWS(System): + """ + AWS Lambda implementation of the System interface. + + This class implements the FaaS System interface for AWS Lambda, + providing methods for deploying, invoking, and managing Lambda functions. + + Attributes: + logs_client: AWS CloudWatch Logs client + cached: Whether AWS resources have been cached + _config: AWS-specific configuration + """ logs_client = None cached = False _config: AWSConfig @staticmethod - def name(): + def name() -> str: + """ + Get the name of this system. + + Returns: + str: System name ('aws') + """ return "aws" @staticmethod - def typename(): + def typename() -> str: + """ + Get the type name of this system. + + Returns: + str: Type name ('AWS') + """ return "AWS" @staticmethod def function_type() -> "Type[Function]": + """ + Get the function type for this system. + + Returns: + Type[Function]: LambdaFunction class + """ return LambdaFunction @property def config(self) -> AWSConfig: + """ + Get the AWS-specific configuration. + + Returns: + AWSConfig: AWS configuration + """ return self._config @property def system_resources(self) -> AWSSystemResources: + """ + Get the AWS system resources manager. + + Returns: + AWSSystemResources: AWS resource manager + """ return cast(AWSSystemResources, self._system_resources) - """ - :param cache_client: Function cache instance - :param config: Experiments config - :param docker_client: Docker instance - """ - def __init__( self, sebs_config: SeBSConfig, @@ -63,6 +107,16 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize the AWS system. + + Args: + sebs_config: SeBs system configuration + config: AWS-specific configuration + cache_client: Cache client for caching resources + docker_client: Docker client for building images + logger_handlers: Logging configuration + """ super().__init__( sebs_config, cache_client, @@ -75,6 +129,16 @@ def __init__( self.nosql_storage: Optional[DynamoDB] = None def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize AWS resources. + + Creates a boto3 session, initializes Lambda client, and prepares + system resources and ECR client. + + Args: + config: Additional configuration parameters + resource_prefix: Optional prefix for resource names + """ # thread-safe self.session = boto3.session.Session( aws_access_key_id=self.config.credentials.access_key, @@ -89,6 +153,12 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] ) def get_lambda_client(self): + """ + Get or create an AWS Lambda client. + + Returns: + boto3.client: Lambda client + """ if not hasattr(self, "client"): self.client = self.session.client( service_name="lambda", @@ -96,24 +166,6 @@ def get_lambda_client(self): ) return self.client - """ - It would be sufficient to just pack the code and ship it as zip to AWS. - However, to have a compatible function implementation across providers, - we create a small module. - Issue: relative imports in Python when using storage wrapper. - Azure expects a relative import inside a module thus it's easier - to always create a module. - - Structure: - function - - function.py - - storage.py - - resources - handler.py - - benchmark: benchmark name - """ - def package_code( self, directory: str, @@ -124,6 +176,35 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """ + Package code for deployment to AWS Lambda. + + Creates a suitable deployment package with the following structure: + + function/ + - function.py + - storage.py + - resources/ + handler.py + + For container deployments, builds a Docker image and pushes it to ECR. + For ZIP deployments, creates a ZIP package compatible with Lambda. + + Args: + directory: Path to the code directory + language_name: Programming language name (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target CPU architecture (e.g., 'x64', 'arm64') + benchmark: Benchmark name + is_cached: Whether code is already cached + container_deployment: Whether to use container deployment + + Returns: + Tuple containing: + - Path to the packaged code (ZIP file) + - Size of the package in bytes + - Container URI (if container_deployment=True, otherwise empty string) + """ container_uri = "" @@ -163,13 +244,33 @@ def package_code( ) def _map_architecture(self, architecture: str) -> str: - + """ + Map architecture name to AWS Lambda-compatible format. + + Args: + architecture: Architecture name from SeBs (e.g., 'x64') + + Returns: + str: AWS Lambda-compatible architecture name (e.g., 'x86_64') + """ if architecture == "x64": return "x86_64" return architecture - def _map_language_runtime(self, language: str, runtime: str): - + def _map_language_runtime(self, language: str, runtime: str) -> str: + """ + Map language runtime to AWS Lambda-compatible format. + + AWS uses different naming schemes for runtime versions. + For example, Node.js uses '12.x' instead of '12'. + + Args: + language: Language name (e.g., 'nodejs', 'python') + runtime: Runtime version (e.g., '12', '3.8') + + Returns: + str: AWS Lambda-compatible runtime version + """ # AWS uses different naming scheme for Node.js versions # For example, it's 12.x instead of 12. if language == "nodejs": @@ -183,6 +284,21 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "LambdaFunction": + """ + Create or update an AWS Lambda function. + + If the function already exists, it updates the code and configuration. + Otherwise, it creates a new function with the specified parameters. + + Args: + code_package: Benchmark code package + func_name: Name of the function + container_deployment: Whether to use container deployment + container_uri: URI of the container image (if container_deployment=True) + + Returns: + LambdaFunction: The created or updated Lambda function + """ package = code_package.code_location benchmark = code_package.benchmark @@ -296,17 +412,6 @@ def cached_function(self, function: Function): for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers - """ - Update function code and configuration on AWS. - - :param benchmark: benchmark name - :param name: function name - :param code_package: path to code package - :param code_size: size of code package in bytes - :param timeout: function timeout in seconds - :param memory: memory limit for function - """ - def update_function( self, function: Function, @@ -314,6 +419,19 @@ def update_function( container_deployment: bool, container_uri: str, ): + """ + Update an existing AWS Lambda function. + + Updates the function code and waits for the update to complete. + For container deployments, updates the container image. + For ZIP deployments, uploads the code package directly or via S3. + + Args: + function: The function to update + code_package: Benchmark code package + container_deployment: Whether to use container deployment + container_uri: URI of the container image (if container_deployment=True) + """ name = function.name function = cast(LambdaFunction, function) diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 27aeb240..58ebd69e 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -1,3 +1,11 @@ +""" +Module for AWS Lambda function implementation in the SeBs framework. + +This module provides the LambdaFunction class, which represents an AWS Lambda +function in the serverless benchmarking suite. It handles AWS-specific attributes +and operations such as ARN, runtime, role, and serialization. +""" + from typing import cast, Optional from sebs.aws.s3 import S3 @@ -6,6 +14,20 @@ class LambdaFunction(Function): + """ + AWS Lambda function implementation for the SeBs framework. + + This class represents an AWS Lambda function in the serverless benchmarking + suite. It extends the base Function class with AWS-specific attributes and + functionality. + + Attributes: + arn: Amazon Resource Name of the Lambda function + role: IAM role ARN used by the function + runtime: Runtime environment for the function (e.g., 'python3.8') + bucket: S3 bucket name where the function code is stored + """ + def __init__( self, name: str, @@ -17,6 +39,19 @@ def __init__( cfg: FunctionConfig, bucket: Optional[str] = None, ): + """ + Initialize an AWS Lambda function. + + Args: + name: Name of the function + benchmark: Name of the benchmark + arn: Amazon Resource Name of the Lambda function + code_package_hash: Hash of the code package + runtime: Runtime environment for the function + role: IAM role ARN used by the function + cfg: Function configuration + bucket: S3 bucket name where the function code is stored + """ super().__init__(benchmark, name, code_package_hash, cfg) self.arn = arn self.role = role @@ -25,9 +60,21 @@ def __init__( @staticmethod def typename() -> str: + """ + Get the type name of this class. + + Returns: + str: The type name + """ return "AWS.LambdaFunction" def serialize(self) -> dict: + """ + Serialize the Lambda function to a dictionary. + + Returns: + dict: Dictionary representation of the Lambda function + """ return { **super().serialize(), "arn": self.arn, @@ -38,6 +85,18 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LambdaFunction": + """ + Create a LambdaFunction instance from a cached configuration. + + Args: + cached_config: Dictionary containing the cached function configuration + + Returns: + LambdaFunction: A new instance with the deserialized data + + Raises: + AssertionError: If an unknown trigger type is encountered + """ from sebs.faas.function import Trigger from sebs.aws.triggers import LibraryTrigger, HTTPTrigger @@ -61,6 +120,16 @@ def deserialize(cached_config: dict) -> "LambdaFunction": ret.add_trigger(trigger_type.deserialize(trigger)) return ret - def code_bucket(self, benchmark: str, storage_client: S3): + def code_bucket(self, benchmark: str, storage_client: S3) -> str: + """ + Get the S3 bucket for the function code. + + Args: + benchmark: Name of the benchmark + storage_client: S3 storage client + + Returns: + str: Name of the S3 bucket + """ self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/benchmark.py b/sebs/benchmark.py index f159e820..b64d8082 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -1,3 +1,11 @@ +""" +Module for handling benchmarks in the Serverless Benchmarking Suite (SeBS). + +This module provides classes for benchmark configuration, code packaging, and execution. +It handles the preparation of code packages with dependencies for deployment to +various serverless platforms, including caching mechanisms to avoid redundant builds. +""" + import glob import hashlib import json @@ -23,9 +31,32 @@ class BenchmarkConfig: + """ + Configuration for a benchmark in the Serverless Benchmarking Suite. + + This class stores the configuration parameters for a benchmark, including + timeout, memory allocation, supported languages, and included modules. + + Attributes: + timeout: Maximum execution time in seconds + memory: Memory allocation in MB + languages: List of supported programming languages + modules: List of benchmark modules/features required + + """ + def __init__( self, timeout: int, memory: int, languages: List["Language"], modules: List[BenchmarkModule] ): + """ + Initialize a benchmark configuration. + + Args: + timeout: Maximum execution time in seconds + memory: Memory allocation in MB + languages: List of supported programming languages + modules: List of benchmark modules/features required + """ self._timeout = timeout self._memory = memory self._languages = languages @@ -33,31 +64,75 @@ def __init__( @property def timeout(self) -> int: + """ + Get the maximum execution time in seconds. + + Returns: + int: The timeout value + """ return self._timeout @timeout.setter def timeout(self, val: int): + """ + Set the maximum execution time in seconds. + + Args: + val: The new timeout value + """ self._timeout = val @property def memory(self) -> int: + """ + Get the memory allocation in MB. + + Returns: + int: The memory allocation + """ return self._memory @memory.setter def memory(self, val: int): + """ + Set the memory allocation in MB. + + Args: + val: The new memory allocation value + """ self._memory = val @property def languages(self) -> List["Language"]: + """ + Get the list of supported programming languages. + + Returns: + List[Language]: Supported programming languages + """ return self._languages @property def modules(self) -> List[BenchmarkModule]: + """ + Get the list of benchmark modules/features required. + + Returns: + List[BenchmarkModule]: Required benchmark modules + """ return self._modules - # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(json_object: dict) -> "BenchmarkConfig": + """ + Create a BenchmarkConfig instance from a JSON object. + + Args: + json_object: Dictionary containing benchmark configuration + + Returns: + BenchmarkConfig: A new instance with the deserialized data + """ from sebs.faas.function import Language return BenchmarkConfig( @@ -68,110 +143,270 @@ def deserialize(json_object: dict) -> "BenchmarkConfig": ) -""" - Creates code package representing a benchmark with all code and assets - prepared and dependency install performed within Docker image corresponding - to the cloud deployment. +class Benchmark(LoggingBase): + """ + Creates code package representing a benchmark with all code and assets. + + This class handles building, packaging, and deploying benchmark code for + serverless platforms. It manages dependencies installation within Docker + images corresponding to the target cloud deployment. The behavior of the class depends on cache state: - 1) First, if there's no cache entry, a code package is built. - 2) Otherwise, the hash of the entire benchmark is computed and compared - with the cached value. If changed, then rebuilt then benchmark. - 3) Otherwise, just return the path to cache code. -""" + 1. If there's no cache entry, a code package is built + 2. Otherwise, the hash of the entire benchmark is computed and compared + with the cached value. If changed, it rebuilds the benchmark + 3. Otherwise, it returns the path to cached code + + Attributes: + benchmark: Name of the benchmark + benchmark_path: Path to the benchmark directory + benchmark_config: Configuration for the benchmark + code_package: Dictionary with code package information + functions: Dictionary of functions for this benchmark + code_location: Location of the code package + is_cached: Whether the benchmark is cached + is_cached_valid: Whether the cached benchmark is valid + code_size: Size of the code package in bytes + container_uri: URI of the container for container deployments + language: Programming language for the benchmark + language_name: Name of the programming language + language_version: Version of the programming language + has_input_processed: Whether input processing has been performed + uses_storage: Whether the benchmark uses cloud storage + uses_nosql: Whether the benchmark uses NoSQL databases + architecture: CPU architecture of the deployment target + container_deployment: Whether using container deployment + """ -class Benchmark(LoggingBase): @staticmethod def typename() -> str: + """ + Get the type name of this class. + + Returns: + str: The type name + """ return "Benchmark" @property - def benchmark(self): + def benchmark(self) -> str: + """ + Get the benchmark name. + + Returns: + str: Name of the benchmark + """ return self._benchmark @property - def benchmark_path(self): + def benchmark_path(self) -> str: + """ + Get the path to the benchmark directory. + + Returns: + str: Path to the benchmark directory + """ return self._benchmark_path @property def benchmark_config(self) -> BenchmarkConfig: + """ + Get the benchmark configuration. + + Returns: + BenchmarkConfig: Configuration for the benchmark + """ return self._benchmark_config @property def code_package(self) -> dict: + """ + Get the code package information. + + Returns: + dict: Dictionary with code package information + """ return self._code_package @property def functions(self) -> Dict[str, Any]: + """ + Get the functions for this benchmark. + + Returns: + Dict[str, Any]: Dictionary of functions + """ return self._functions @property - def code_location(self): + def code_location(self) -> str: + """ + Get the location of the code package. + + Returns: + str: Path to the code package + """ if self.code_package: return os.path.join(self._cache_client.cache_dir, self.code_package["location"]) else: return self._code_location @property - def is_cached(self): + def is_cached(self) -> bool: + """ + Check if the benchmark is cached. + + Returns: + bool: True if cached, False otherwise + """ return self._is_cached @is_cached.setter def is_cached(self, val: bool): + """ + Set whether the benchmark is cached. + + Args: + val: True if cached, False otherwise + """ self._is_cached = val @property - def is_cached_valid(self): + def is_cached_valid(self) -> bool: + """ + Check if the cached benchmark is valid. + + Returns: + bool: True if valid, False otherwise + """ return self._is_cached_valid @is_cached_valid.setter def is_cached_valid(self, val: bool): + """ + Set whether the cached benchmark is valid. + + Args: + val: True if valid, False otherwise + """ self._is_cached_valid = val @property - def code_size(self): + def code_size(self) -> int: + """ + Get the size of the code package in bytes. + + Returns: + int: Size in bytes + """ return self._code_size @property def container_uri(self) -> str: + """ + Get the URI of the container for container deployments. + + Returns: + str: Container URI + + Raises: + AssertionError: If container URI is None + """ assert self._container_uri is not None return self._container_uri @property def language(self) -> "Language": + """ + Get the programming language for the benchmark. + + Returns: + Language: Programming language + """ return self._language @property def language_name(self) -> str: + """ + Get the name of the programming language. + + Returns: + str: Name of the language + """ return self._language.value @property - def language_version(self): + def language_version(self) -> str: + """ + Get the version of the programming language. + + Returns: + str: Version of the language + """ return self._language_version @property def has_input_processed(self) -> bool: + """ + Check if input processing has been performed. + + Returns: + bool: True if processed, False otherwise + """ return self._input_processed @property def uses_storage(self) -> bool: + """ + Check if the benchmark uses cloud storage. + + Returns: + bool: True if using storage, False otherwise + """ return self._uses_storage @property def uses_nosql(self) -> bool: + """ + Check if the benchmark uses NoSQL databases. + + Returns: + bool: True if using NoSQL, False otherwise + """ return self._uses_nosql @property def architecture(self) -> str: + """ + Get the CPU architecture of the deployment target. + + Returns: + str: Architecture name (e.g., 'x86_64', 'arm64') + """ return self._architecture @property - def container_deployment(self): + def container_deployment(self) -> bool: + """ + Check if using container deployment. + + Returns: + bool: True if using container deployment, False otherwise + """ return self._container_deployment @property # noqa: A003 - def hash(self): + def hash(self) -> str: + """ + Get the hash of the benchmark code. + + Computes an MD5 hash of the benchmark directory to determine if + the code has changed since the last build. + + Returns: + str: MD5 hash as a hexadecimal string + """ path = os.path.join(self.benchmark_path, self.language_name) self._hash_value = Benchmark.hash_directory(path, self._deployment_name, self.language_name) return self._hash_value @@ -179,7 +414,12 @@ def hash(self): @hash.setter # noqa: A003 def hash(self, val: str): """ + Set the hash of the benchmark code. + Used only for testing purposes. + + Args: + val: MD5 hash as a hexadecimal string """ self._hash_value = val @@ -193,6 +433,25 @@ def __init__( cache_client: Cache, docker_client: docker.client, ): + """ + Initialize a Benchmark instance. + + Sets up a benchmark for a specific deployment platform, including configuration, + language runtime, and caching. Loads the benchmark configuration from the JSON file + and validates the language support. + + Args: + benchmark: Name of the benchmark + deployment_name: Name of the deployment platform (e.g., 'aws', 'azure') + config: Experiment configuration + system_config: SeBs system configuration + output_dir: Directory for output files + cache_client: Cache client for caching code packages + docker_client: Docker client for building dependencies + + Raises: + RuntimeError: If the benchmark is not found or doesn't support the language + """ super().__init__() self._benchmark = benchmark self._deployment_name = deployment_name @@ -232,7 +491,6 @@ def __init__( self._is_cached_valid = False # Load input module - self._benchmark_data_path = find_benchmark(self._benchmark, "benchmarks-data") self._benchmark_input_module = load_benchmark_input(self._benchmark_path) @@ -241,13 +499,23 @@ def __init__( self._uses_storage: bool = False self._uses_nosql: bool = False - """ - Compute MD5 hash of an entire directory. - """ - @staticmethod - def hash_directory(directory: str, deployment: str, language: str): - + def hash_directory(directory: str, deployment: str, language: str) -> str: + """ + Compute MD5 hash of an entire directory. + + Calculates a hash of the benchmark source code by combining hashes of all + relevant files. This includes language-specific files, deployment wrappers, + and shared files like shell scripts and JSON configuration. + + Args: + directory: Path to the directory to hash + deployment: Name of the deployment platform + language: Programming language name + + Returns: + str: MD5 hash as a hexadecimal string + """ hash_sum = hashlib.md5() FILES = { "python": ["*.py", "requirements.txt*"], @@ -272,10 +540,22 @@ def hash_directory(directory: str, deployment: str, language: str): return hash_sum.hexdigest() def serialize(self) -> dict: + """ + Serialize the benchmark to a dictionary. + + Returns: + dict: Dictionary containing size and hash of the benchmark code + """ return {"size": self.code_size, "hash": self.hash} - def query_cache(self): - + def query_cache(self) -> None: + """ + Query the cache for existing benchmark code packages and functions. + + Checks if there's a cached code package or container for this benchmark + and deployment combination. Updates the cache status fields based on + whether the cache exists and if it's still valid (hash matches). + """ if self.container_deployment: self._code_package = self._cache_client.get_container( deployment=self._deployment_name, diff --git a/sebs/experiments/__init__.py b/sebs/experiments/__init__.py index ff820d40..64e82c00 100644 --- a/sebs/experiments/__init__.py +++ b/sebs/experiments/__init__.py @@ -1,3 +1,18 @@ +"""Experiment implementations for serverless benchmarking. + +This package provides a collection of experiment implementations for +measuring various aspects of serverless function performance: + +- PerfCost: Measures performance and cost characteristics +- NetworkPingPong: Measures network latency and throughput +- EvictionModel: Measures container eviction patterns +- InvocationOverhead: Measures function invocation overhead + +Each experiment is designed to evaluate specific aspects of serverless +platforms, enabling detailed comparison between different providers, +configurations, and workloads. +""" + from .result import Result as ExperimentResult # noqa from .experiment import Experiment # noqa from .perf_cost import PerfCost # noqa diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index 26aea9f2..fa516626 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -1,10 +1,42 @@ +"""Configuration management for benchmark experiments. + +This module provides the configuration class for benchmark experiments, +handling settings such as: +- Runtime environment (language, version) +- Architecture (x64, arm64) +- Deployment type (container, package) +- Code and storage update flags +- Experiment-specific settings + +The Config class handles serialization and deserialization of experiment +configurations, allowing them to be loaded from and saved to configuration files. +""" + from typing import Dict from sebs.faas.function import Runtime class Config: + """Configuration class for benchmark experiments. + + This class manages the configuration settings for benchmark experiments, + including runtime environment, architecture, deployment type, and + experiment-specific settings. + + Attributes: + _update_code: Whether to update function code + _update_storage: Whether to update storage resources + _container_deployment: Whether to use container-based deployment + _download_results: Whether to download experiment results + _architecture: CPU architecture (e.g., "x64", "arm64") + _flags: Dictionary of boolean flags for custom settings + _experiment_configs: Dictionary of experiment-specific settings + _runtime: Runtime environment (language and version) + """ + def __init__(self): + """Initialize a new experiment configuration with default values.""" self._update_code: bool = False self._update_storage: bool = False self._container_deployment: bool = False @@ -16,35 +48,92 @@ def __init__(self): @property def update_code(self) -> bool: + """Get whether to update function code. + + Returns: + True if function code should be updated, False otherwise + """ return self._update_code @update_code.setter def update_code(self, val: bool): + """Set whether to update function code. + + Args: + val: True if function code should be updated, False otherwise + """ self._update_code = val @property def update_storage(self) -> bool: + """Get whether to update storage resources. + + Returns: + True if storage resources should be updated, False otherwise + """ return self._update_storage def check_flag(self, key: str) -> bool: + """Check if a flag is set. + + Args: + key: Name of the flag to check + + Returns: + Value of the flag, or False if the flag is not set + """ return False if key not in self._flags else self._flags[key] @property def runtime(self) -> Runtime: + """Get the runtime environment. + + Returns: + Runtime environment (language and version) + """ return self._runtime @property def architecture(self) -> str: + """Get the CPU architecture. + + Returns: + CPU architecture (e.g., "x64", "arm64") + """ return self._architecture @property def container_deployment(self) -> bool: + """Get whether to use container-based deployment. + + Returns: + True if container-based deployment should be used, False otherwise + """ return self._container_deployment def experiment_settings(self, name: str) -> dict: + """Get settings for a specific experiment. + + Args: + name: Name of the experiment + + Returns: + Dictionary of experiment-specific settings + + Raises: + KeyError: If the experiment name is not found in the configuration + """ return self._experiment_configs[name] def serialize(self) -> dict: + """Serialize the configuration to a dictionary. + + This method converts the configuration object to a dictionary + that can be saved to a file or passed to other components. + + Returns: + Dictionary representation of the configuration + """ out = { "update_code": self._update_code, "update_storage": self._update_storage, @@ -60,7 +149,22 @@ def serialize(self) -> dict: # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(config: dict) -> "Config": - + """Deserialize a configuration from a dictionary. + + This method creates a new configuration object from a dictionary + representation, which may have been loaded from a file or passed + from another component. + + Args: + config: Dictionary representation of the configuration + + Returns: + A new configuration object with settings from the dictionary + + Note: + This method requires Python 3.7+ for proper type annotations. + The string type annotation is a forward reference to the Config class. + """ cfg = Config() cfg._update_code = config["update_code"] cfg._update_storage = config["update_storage"] @@ -70,6 +174,7 @@ def deserialize(config: dict) -> "Config": cfg._flags = config["flags"] if "flags" in config else {} cfg._architecture = config["architecture"] + # Import experiment types here to avoid circular import from sebs.experiments import ( NetworkPingPong, PerfCost, @@ -77,6 +182,7 @@ def deserialize(config: dict) -> "Config": EvictionModel, ) + # Load experiment-specific settings if present for exp in [NetworkPingPong, PerfCost, InvocationOverhead, EvictionModel]: if exp.name() in config: cfg._experiment_configs[exp.name()] = config[exp.name()] diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 30fe7274..766a7615 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -1,3 +1,15 @@ +"""Container eviction model experiment implementation. + +This module provides the EvictionModel experiment implementation, which +measures how serverless platforms manage function container eviction. +It determines how long idle containers are kept alive before being +recycled by the platform, which affects cold start frequency. + +The experiment involves invoking functions at increasing time intervals +and observing when cold starts occur, thus inferring the platform's +container caching and eviction policies. +""" + import logging import os import time @@ -17,45 +29,91 @@ class EvictionModel(Experiment): - + """Container eviction model experiment. + + This experiment measures how serverless platforms manage function + container eviction. It determines how long idle containers are kept + alive before being recycled by the platform, which affects cold start + frequency. + + The experiment invokes functions at different time intervals (defined + in the 'times' list) and observes when cold starts occur, thus inferring + the platform's container caching and eviction policies. + + Attributes: + times: List of time intervals (in seconds) between invocations + _function: Function to invoke + _trigger: Trigger to use for invocation + _out_dir: Directory for storing results + _deployment_client: Deployment client to use + _sebs_client: SeBS client + """ + + # Time intervals (in seconds) between invocations + # Uncomment additional intervals as needed for longer tests times = [ - 1, - # 2, - # 4, - # 8, - # 15, - # 30, - # 60, - # 120, - # 180, - # 240, - # 300, - # 360, - # 480, - # 600, - # 720, - # 900, - # 1080, - # 1200, + 1, # 1 second + # 2, # 2 seconds + # 4, # 4 seconds + # 8, # 8 seconds + # 15, # 15 seconds + # 30, # 30 seconds + # 60, # 1 minute + # 120, # 2 minutes + # 180, # 3 minutes + # 240, # 4 minutes + # 300, # 5 minutes + # 360, # 6 minutes + # 480, # 8 minutes + # 600, # 10 minutes + # 720, # 12 minutes + # 900, # 15 minutes + # 1080, # 18 minutes + # 1200, # 20 minutes ] # TODO: temporal fix # function_copies_per_time = 5 function_copies_per_time = 1 def __init__(self, config: ExperimentConfig): + """Initialize a new EvictionModel experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "eviction-model" + """ return "eviction-model" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.EvictionModel" + """ return "Experiment.EvictionModel" @staticmethod def accept_replies(port: int, invocations: int): - + """Accept TCP connections from functions and respond to them. + + This static method acts as a TCP server, accepting connections from + functions and responding to them. It runs two rounds of connection + acceptance to ensure functions receive a response. The method logs + all activity to a file. + + Args: + port: TCP port to listen on + invocations: Number of expected function invocations + """ with open(f"server_{invocations}.log", "w") as f: import socket @@ -177,21 +235,37 @@ def process_function( return final_results def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): - + """Prepare the experiment for execution. + + This method sets up the benchmark, functions, and output directory for + the experiment. It creates a separate function for each time interval + and copy combination, allowing for parallel testing of different + eviction times. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + """ + # Get the server-reply benchmark self._benchmark = sebs_client.get_benchmark( "040.server-reply", deployment_client, self.config ) self._deployment_client = deployment_client self._result = ExperimentResult(self.config, deployment_client.config) + + # Create function names for each time interval and copy name = deployment_client.default_function_name(self._benchmark) self.functions_names = [ f"{name}-{time}-{copy}" for time in self.times for copy in range(self.function_copies_per_time) ] + + # Create output directory self._out_dir = os.path.join(sebs_client.output_dir, "eviction-model") if not os.path.exists(self._out_dir): os.mkdir(self._out_dir) + self.functions = [] for fname in self.functions_names: diff --git a/sebs/experiments/experiment.py b/sebs/experiments/experiment.py index ee5a456f..5d50ead6 100644 --- a/sebs/experiments/experiment.py +++ b/sebs/experiments/experiment.py @@ -1,3 +1,18 @@ +"""Base abstract class for implementing serverless benchmark experiments. + +This module provides the base Experiment abstract class that defines the common +interface and functionality for all benchmark experiments in the serverless +benchmarking suite. Each experiment type inherits from this class and implements +its specific logic for executing benchmarks, measuring performance, and analyzing +results. + +The Experiment class handles: +- Configuration management +- Parallel invocation coordination +- Logging setup +- Type and name identification for experiments +""" + from abc import ABC from abc import abstractmethod from multiprocessing import Semaphore @@ -9,7 +24,26 @@ class Experiment(ABC, LoggingBase): + """Abstract base class for all serverless benchmark experiments. + + This class provides the common functionality and interface for all + experiment implementations. It manages configuration, handles logging, + and defines the abstract methods that must be implemented by specific + experiment types. + + Attributes: + config: Experiment configuration settings + _threads: Number of concurrent threads to use for the experiment + _invocations: Number of function invocations to perform + _invocation_barrier: Semaphore for coordinating parallel invocations + """ + def __init__(self, cfg: ExperimentConfig): + """Initialize a new experiment. + + Args: + cfg: Experiment configuration settings + """ super().__init__() self._config = cfg self._threads = 1 @@ -17,15 +51,38 @@ def __init__(self, cfg: ExperimentConfig): self._invocation_barrier = Semaphore(self._invocations) @property - def config(self): + def config(self) -> ExperimentConfig: + """Get the experiment configuration. + + Returns: + The experiment configuration + """ return self._config @staticmethod @abstractmethod def name() -> str: + """Get the name of the experiment. + + This method must be implemented by all subclasses to return + a unique name for the experiment type, which is used for + configuration and identification. + + Returns: + A string name for the experiment + """ pass @staticmethod @abstractmethod def typename() -> str: + """Get the type name of the experiment. + + This method must be implemented by all subclasses to return + a human-readable type name for the experiment, which is used + for display and reporting. + + Returns: + A string type name for the experiment + """ pass diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index d7fc56f7..083e6bd4 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -1,3 +1,18 @@ +"""Invocation overhead measurement experiment implementation. + +This module provides the InvocationOverhead experiment implementation, which +measures the overhead associated with invoking serverless functions. It can +measure: + +- Overhead of different invocation methods (HTTP, SDK) +- Impact of code package size on deployment and invocation time +- Overhead of different input data sizes +- Cold vs. warm start invocation times + +The experiment is designed to help identify performance bottlenecks and +optimize function deployment and invocation. +""" + import csv import os import random @@ -15,10 +30,33 @@ class CodePackageSize: + """Helper class for code package size experiments. + + This class handles creating and deploying functions with different code + package sizes to measure the impact of package size on deployment and + invocation overhead. + + Attributes: + _benchmark_path: Path to the benchmark code + _benchmark: Benchmark instance + _deployment_client: Deployment client to use + sizes: List of code package sizes to test + functions: Dictionary mapping size to function instances + """ + def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings: dict): + """Initialize a new code package size experiment. + + Args: + deployment_client: Deployment client to use + benchmark: Benchmark instance + settings: Experiment settings with code_package_begin, code_package_end, + and code_package_points values + """ import math from numpy import linspace + # Generate code package sizes to test points = linspace( settings["code_package_begin"], settings["code_package_end"], @@ -26,6 +64,7 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings ) from sebs.utils import find_benchmark + # Use the clock synchronization benchmark as a base self._benchmark_path = find_benchmark("030.clock-synchronization", "benchmarks") self._benchmark = benchmark random.seed(1410) @@ -65,23 +104,59 @@ def before_sample(self, size: int, input_benchmark: dict): class InvocationOverhead(Experiment): + """Invocation overhead measurement experiment. + + This experiment measures the overhead associated with invoking serverless + functions. It can measure the impact of code package size, input data size, + and different invocation methods on performance. + + Attributes: + settings: Experiment-specific settings + _benchmark: Benchmark to use + benchmark_input: Input data for the benchmark + _storage: Storage service to use + _function: Function to invoke + _code_package: Code package size experiment helper + _out_dir: Directory for storing results + _deployment_client: Deployment client to use + _sebs_client: SeBS client + """ + def __init__(self, config: ExperimentConfig): + """Initialize a new InvocationOverhead experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) self.settings = self.config.experiment_settings(self.name()) def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): - - # deploy network test function + """Prepare the experiment for execution. + + This method sets up the benchmark, function, storage, and output directory + for the experiment. It uses the clock-synchronization benchmark as a base + and prepares the necessary resources for measuring invocation overhead. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + """ + # Import needed modules from sebs import SeBS # noqa from sebs.faas.function import Trigger + # Get the clock-synchronization benchmark self._benchmark = sebs_client.get_benchmark( "030.clock-synchronization", deployment_client, self.config ) + # Prepare benchmark input self.benchmark_input = self._benchmark.prepare_input( deployment_client.system_resources, size="test", replace_existing=True ) + + # Get storage for testing self._storage = deployment_client.system_resources.get_storage(replace_existing=True) self._function = deployment_client.get_function(self._benchmark) @@ -306,8 +381,18 @@ def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "invocation-overhead" + """ return "invocation-overhead" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.InvocOverhead" + """ return "Experiment.InvocOverhead" diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index 6c44f848..d4995ae1 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -1,3 +1,16 @@ +"""Network latency and throughput measurement experiment implementation. + +This module provides the NetworkPingPong experiment implementation, which +measures network latency and throughput characteristics between client and +serverless functions, as well as between serverless functions and storage +services. It can determine: + +- Client-to-function latency +- Function-to-storage latency +- Network throughput for different payload sizes +- Variation in network performance over time +""" + import csv import socket import os @@ -20,35 +33,79 @@ class NetworkPingPong(Experiment): + """Network latency and throughput measurement experiment. + + This experiment measures the network performance characteristics + between the client, serverless functions, and storage services. + It can measure ping-pong latency and throughput with different + payload sizes and concurrency levels. + + Attributes: + benchmark_input: Input configuration for the benchmark + _storage: Storage service to use for testing + _function: Function to invoke + _triggers: Dictionary of triggers by type + _out_dir: Directory for storing results + _deployment_client: Deployment client to use + _sebs_client: SeBS client + """ + def __init__(self, config: ExperimentConfig): + """Initialize a new NetworkPingPong experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): - + """Prepare the experiment for execution. + + This method sets up the benchmark, function, triggers, storage, and output + directory for the experiment. It creates or gets the function and + its HTTP trigger, and prepares the input data for the benchmark. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + """ + # Get the network benchmark benchmark = sebs_client.get_benchmark( "020.network-benchmark", deployment_client, self.config ) + # Prepare benchmark input self.benchmark_input = benchmark.prepare_input( deployment_client.system_resources, size="test", replace_existing=True ) + + # Get storage for testing storage latency self._storage = deployment_client.system_resources.get_storage(replace_existing=True) + # Get or create function self._function = deployment_client.get_function(benchmark) + # Create output directory self._out_dir = os.path.join(sebs_client.output_dir, "network-ping-pong") if not os.path.exists(self._out_dir): # shutil.rmtree(self._out_dir) os.mkdir(self._out_dir) + # Make sure there's an HTTP trigger triggers = self._function.triggers(Trigger.TriggerType.HTTP) if len(triggers) == 0: deployment_client.create_trigger(self._function, Trigger.TriggerType.HTTP) def run(self): - + """Run the network ping-pong experiment. + + This method executes the experiment, measuring network latency and + throughput between the client and the serverless function. It first + determines the client's public IP address to include in the results. + """ from requests import get + # Get the client's public IP address ip = get("http://checkip.amazonaws.com/").text.rstrip() settings = self.config.experiment_settings(self.name()) invocations = settings["invocations"] @@ -67,8 +124,16 @@ def run(self): self._storage.download_bucket(self.benchmark_input["output-bucket"], self._out_dir) def process(self, directory: str): - - full_data: Dict[str, pd.Dataframe] = {} + """Process the experiment results. + + This method processes the CSV files generated during the experiment + execution, computes round-trip times (RTT), and generates summary + statistics and a histogram of the RTT distribution. + + Args: + directory: Directory containing the experiment results + """ + full_data: Dict[str, pd.DataFrame] = {} for f in glob.glob(os.path.join(directory, "network-ping-pong", "*.csv")): request_id = os.path.basename(f).split("-", 1)[1].split(".")[0] @@ -94,7 +159,17 @@ def process(self, directory: str): fig.savefig(os.path.join(directory, "histogram.png")) def receive_datagrams(self, repetitions: int, port: int, ip: str): - + """Receive UDP datagrams from the function and respond to them. + + This method acts as a UDP server, receiving datagrams from the function + and responding to them. It measures the timestamps of packet reception + and response, and records them for later analysis. + + Args: + repetitions: Number of repetitions to execute + port: UDP port to listen on + ip: IP address to include in the function invocation input + """ print(f"Starting invocation with {repetitions} repetitions on port {port}") socket.setdefaulttimeout(2) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) @@ -143,8 +218,18 @@ def receive_datagrams(self, repetitions: int, port: int, ip: str): @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "network-ping-pong" + """ return "network-ping-pong" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.NetworkPingPong" + """ return "Experiment.NetworkPingPong" diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 7b940f8d..fdb61a6b 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -1,3 +1,18 @@ +"""Performance and cost measurement experiment implementation. + +This module provides the PerfCost experiment implementation, which measures +the performance characteristics and execution costs of serverless functions. +It can run several experiment types: + +- Cold: Measures cold start performance by enforcing container recreation +- Warm: Measures warm execution performance with reused containers +- Burst: Measures performance under concurrent burst load +- Sequential: Measures performance with sequential invocations + +The experiment collects detailed metrics about execution time, memory usage, +and costs, and provides statistical analysis of the results. +""" + import json import os import time @@ -19,44 +34,103 @@ class PerfCost(Experiment): + """Performance and cost measurement experiment. + + This experiment measures the performance characteristics and execution + costs of serverless functions under different execution conditions. + It can measure cold starts, warm execution, burst load, and sequential + execution patterns. + + The experiment can be configured to run with different memory sizes, + allowing for comparison of performance across different resource allocations. + + Attributes: + _benchmark: The benchmark to execute + _benchmark_input: The input data for the benchmark + _function: The function to invoke + _trigger: The trigger to use for invocation + _out_dir: Directory for storing results + _deployment_client: The deployment client to use + _sebs_client: The SeBS client + """ + def __init__(self, config: ExperimentConfig): + """Initialize a new PerfCost experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "perf-cost" + """ return "perf-cost" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.PerfCost" + """ return "Experiment.PerfCost" class RunType(Enum): + """Types of experiment runs. + + This enum defines the different types of experiment runs: + - WARM: Measure warm execution performance (reused containers) + - COLD: Measure cold start performance (new containers) + - BURST: Measure performance under concurrent burst load + - SEQUENTIAL: Measure performance with sequential invocations + """ + WARM = 0 COLD = 1 BURST = 2 SEQUENTIAL = 3 def str(self) -> str: + """Get the string representation of the run type. + + Returns: + The lowercase name of the run type + """ return self.name.lower() def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): - - # create benchmark instance + """Prepare the experiment for execution. + + This method sets up the benchmark, function, trigger, and output + directory for the experiment. It creates or gets the function and + its HTTP trigger, and prepares the input data for the benchmark. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + """ + # Create benchmark instance settings = self.config.experiment_settings(self.name()) self._benchmark = sebs_client.get_benchmark( settings["benchmark"], deployment_client, self.config ) - # prepare benchmark input + # Prepare benchmark input self._benchmark_input = self._benchmark.prepare_input( deployment_client.system_resources, size=settings["input-size"], replace_existing=self.config.update_storage, ) + # Get or create function self._function = deployment_client.get_function(self._benchmark) - # add HTTP trigger + # Add HTTP trigger if not already present triggers = self._function.triggers(Trigger.TriggerType.HTTP) if len(triggers) == 0: self._trigger = deployment_client.create_trigger( @@ -65,33 +139,62 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): else: self._trigger = triggers[0] + # Create output directory self._out_dir = os.path.join(sebs_client.output_dir, "perf-cost") if not os.path.exists(self._out_dir): os.mkdir(self._out_dir) + + # Save clients for later use self._deployment_client = deployment_client self._sebs_client = sebs_client def run(self): - + """Run the experiment. + + This method runs the experiment with the configured settings. + If memory sizes are specified, it runs the experiment for each + memory size, updating the function configuration accordingly. + Otherwise, it runs the experiment once with the default memory + configuration. + """ settings = self.config.experiment_settings(self.name()) - # Execution on systems where memory configuration is not provided + # Get memory sizes to test memory_sizes = settings["memory-sizes"] + + # Run with default memory if no specific sizes are provided if len(memory_sizes) == 0: - self.logging.info("Begin experiment") + self.logging.info("Begin experiment with default memory configuration") self.run_configuration(settings, settings["repetitions"]) + + # Run for each specified memory size for memory in memory_sizes: self.logging.info(f"Begin experiment on memory size {memory}") + # Update function memory configuration self._function.config.memory = memory self._deployment_client.update_function(self._function, self._benchmark, False, "") self._sebs_client.cache_client.update_function(self._function) + # Run experiment with this memory configuration self.run_configuration(settings, settings["repetitions"], suffix=str(memory)) def compute_statistics(self, times: List[float]): - + """Compute statistical analysis of execution times. + + This method computes basic statistics (mean, median, standard deviation, + coefficient of variation) and confidence intervals for the given times. + It computes both parametric (Student's t-distribution) and non-parametric + confidence intervals. + + Args: + times: List of execution times in milliseconds + """ + # Compute basic statistics mean, median, std, cv = basic_stats(times) self.logging.info(f"Mean {mean} [ms], median {median} [ms], std {std}, CV {cv}") + + # Compute confidence intervals for different confidence levels for alpha in [0.95, 0.99]: + # Parametric confidence interval (Student's t-distribution) ci_interval = ci_tstudents(alpha, times) interval_width = ci_interval[1] - ci_interval[0] ratio = 100 * interval_width / mean / 2.0 @@ -100,6 +203,8 @@ def compute_statistics(self, times: List[float]): f"{ci_interval[0]} to {ci_interval[1]}, within {ratio}% of mean" ) + # Non-parametric confidence interval (Le Boudec's method) + # Only compute if we have enough samples (> 20) if len(times) > 20: ci_interval = ci_le_boudec(alpha, times) interval_width = ci_interval[1] - ci_interval[0] @@ -117,6 +222,20 @@ def _run_configuration( repetitions: int, suffix: str = "", ): + """Run a specific experiment configuration. + + This method executes the experiment with the specified run type, + collecting and recording the results. It handles different run types + (cold, warm, burst, sequential) appropriately, enforcing cold starts + when needed and collecting execution statistics. + + Args: + run_type: Type of run (cold, warm, burst, sequential) + settings: Experiment settings + invocations: Number of concurrent invocations + repetitions: Total number of repetitions to run + suffix: Optional suffix for output file names (e.g., memory size) + """ # Randomize starting value to ensure that it's not the same # as in the previous run. @@ -227,9 +346,24 @@ def _run_configuration( ) def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): - + """Run experiments for each configured experiment type. + + This method runs the experiment for each experiment type specified + in the settings. It dispatches to the appropriate run type handler + for each experiment type. + + Args: + settings: Experiment settings + repetitions: Number of repetitions to run + suffix: Optional suffix for output file names (e.g., memory size) + + Raises: + RuntimeError: If an unknown experiment type is specified + """ + # Run each configured experiment type for experiment_type in settings["experiments"]: if experiment_type == "cold": + # Cold start experiments - enforce container recreation self._run_configuration( PerfCost.RunType.COLD, settings, @@ -238,6 +372,7 @@ def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): suffix, ) elif experiment_type == "warm": + # Warm execution experiments - reuse containers self._run_configuration( PerfCost.RunType.WARM, settings, @@ -246,6 +381,7 @@ def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): suffix, ) elif experiment_type == "burst": + # Burst load experiments - concurrent invocations self._run_configuration( PerfCost.RunType.BURST, settings, @@ -254,6 +390,7 @@ def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): suffix, ) elif experiment_type == "sequential": + # Sequential invocation experiments - one at a time self._run_configuration( PerfCost.RunType.SEQUENTIAL, settings, 1, repetitions, suffix ) @@ -268,6 +405,20 @@ def process( logging_filename: str, extend_time_interval: int, ): + """Process experiment results and generate a CSV report. + + This method processes the experiment results, downloads additional + metrics if needed, and generates a CSV report with the results. + The report includes memory usage, execution times, and other metrics + for each experiment type and invocation. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + directory: Directory where results are stored + logging_filename: Filename for logs + extend_time_interval: Time interval to extend metrics retrieval by (in minutes) + """ import glob import csv diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index b28de75c..9a8d22f2 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -1,3 +1,15 @@ +"""Experiment result collection and management. + +This module provides the Result class for managing experiment results, including: +- Function invocation results +- Metrics from cloud providers +- Experiment start and end times +- Configuration information + +The Result class handles serialization, deserialization, and analysis of +experiment results, making it easier to process and visualize the data. +""" + from datetime import datetime from typing import Dict, List, Optional, Tuple # noqa @@ -9,6 +21,23 @@ class Result: + """Experiment result collection and management. + + This class stores and manages the results of experiments, including function + invocation results, metrics from cloud providers, and configuration information. + It provides methods for adding invocation results, retrieving metrics, and + serializing/deserializing results. + + Attributes: + config: Dictionary containing experiment and deployment configurations + _invocations: Dictionary mapping function names to invocation results + _metrics: Dictionary mapping function names to metrics + _start_time: Experiment start time + _end_time: Experiment end time + result_bucket: Optional bucket name for storing results + logging_handlers: Logging handlers for the result + """ + def __init__( self, experiment_config: ExperimentConfig, @@ -17,6 +46,15 @@ def __init__( metrics: Optional[Dict[str, dict]] = None, result_bucket: Optional[str] = None, ): + """Initialize a new experiment result. + + Args: + experiment_config: Experiment configuration + deployment_config: Deployment configuration + invocations: Optional dictionary of function invocation results + metrics: Optional dictionary of function metrics + result_bucket: Optional bucket name for storing results + """ self.config = { "experiments": experiment_config, "deployment": deployment_config, @@ -32,42 +70,110 @@ def __init__( self.result_bucket = result_bucket def begin(self): + """Mark the beginning of the experiment. + + This method records the start time of the experiment. + """ self.begin_time = datetime.now().timestamp() def end(self): + """Mark the end of the experiment. + + This method records the end time of the experiment. + """ self.end_time = datetime.now().timestamp() def times(self) -> Tuple[int, int]: + """Get the start and end times of the experiment. + + Returns: + Tuple of (start_time, end_time) as Unix timestamps + """ return self.begin_time, self.end_time def add_result_bucket(self, result_bucket: str): + """Set the result bucket for storing experiment results. + + Args: + result_bucket: Name of the bucket to store results in + """ self.result_bucket = result_bucket def add_invocation(self, func: Function, invocation: ExecutionResult): - # the function has most likely failed, thus no request id + """Add an invocation result for a specific function. + + If the invocation doesn't have a request ID (likely due to failure), + a synthetic ID is generated. + + Args: + func: Function the invocation belongs to + invocation: Execution result to add + """ + # The function has most likely failed, thus no request id if invocation.request_id: req_id = invocation.request_id else: req_id = f"failed-{len(self._invocations.get(func.name, []))}" + # Add to existing invocations or create new entry if func.name in self._invocations: self._invocations.get(func.name)[req_id] = invocation # type: ignore else: self._invocations[func.name] = {req_id: invocation} def functions(self) -> List[str]: + """Get a list of all function names in the results. + + Returns: + List of function names + """ return list(self._invocations.keys()) def invocations(self, func: str) -> Dict[str, ExecutionResult]: + """Get invocation results for a specific function. + + Args: + func: Name of the function to get invocation results for + + Returns: + Dictionary mapping request IDs to execution results + + Raises: + KeyError: If function name is not found in results + """ return self._invocations[func] def metrics(self, func: str) -> dict: + """Get metrics for a specific function. + + If no metrics exist for the function, an empty dictionary is created + and returned. + + Args: + func: Name of the function to get metrics for + + Returns: + Dictionary of metrics for the function + """ if func not in self._metrics: self._metrics[func] = {} return self._metrics[func] @staticmethod def deserialize(cached_config: dict, cache: Cache, handlers: LoggingHandlers) -> "Result": + """Deserialize a result from a dictionary representation. + + This static method creates a new Result object from a dictionary + representation, which may have been loaded from a file or cache. + + Args: + cached_config: Dictionary representation of the result + cache: Cache instance for resolving references + handlers: Logging handlers for the result + + Returns: + A new Result object with settings from the dictionary + """ invocations: Dict[str, dict] = {} for func, func_invocations in cached_config["_invocations"].items(): invocations[func] = {} diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 0fab7bcf..91683a38 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -1,3 +1,17 @@ +""" +Function and execution model for the serverless benchmarking framework. + +This module defines the core abstractions for serverless functions, including: +- Function class: Represents a deployed serverless function +- Trigger class: Represents invocation mechanisms for functions +- Runtime and FunctionConfig: Configuration parameters for functions +- ExecutionResult and related classes: Data model for capturing measurements + +These abstractions provide a unified interface for handling functions across +different FaaS platforms, allowing for consistent deployment, invocation, +and measurement collection. +""" + from __future__ import annotations import json @@ -12,12 +26,24 @@ from sebs.benchmark import Benchmark from sebs.utils import LoggingBase -""" - Times are reported in microseconds. -""" - class ExecutionTimes: + """ + Client-side timing measurements for function execution. + + Stores various timing measurements from the client's perspective, + including total execution time, HTTP connection times, and benchmark + runtime. + + Attributes: + client: Total client-side execution time in microseconds + client_begin: Timestamp when the request was initiated + client_end: Timestamp when the response was received + benchmark: Benchmark execution time in microseconds + initialization: Function initialization time in microseconds + http_startup: Time to establish HTTP connection in seconds + http_first_byte_return: Time to first byte in seconds + """ client: int client_begin: datetime @@ -28,94 +54,217 @@ class ExecutionTimes: http_first_byte_return: int def __init__(self): + """Initialize with default values.""" self.client = 0 self.initialization = 0 self.benchmark = 0 @staticmethod def deserialize(cached_obj: dict) -> "ExecutionTimes": + """ + Create an ExecutionTimes instance from a dictionary. + + Args: + cached_obj: Dictionary containing serialized timing data + + Returns: + ExecutionTimes: New instance with the deserialized data + """ ret = ExecutionTimes() ret.__dict__.update(cached_obj) return ret class ProviderTimes: + """ + Provider-reported timing measurements for function execution. + + Stores timing measurements reported by the cloud provider, + including initialization time and execution time. + + Attributes: + initialization: Function initialization time in microseconds + execution: Function execution time in microseconds + """ initialization: int execution: int def __init__(self): + """Initialize with default values.""" self.execution = 0 self.initialization = 0 @staticmethod def deserialize(cached_obj: dict) -> "ProviderTimes": + """ + Create a ProviderTimes instance from a dictionary. + + Args: + cached_obj: Dictionary containing serialized timing data + + Returns: + ProviderTimes: New instance with the deserialized data + """ ret = ProviderTimes() ret.__dict__.update(cached_obj) return ret class ExecutionStats: + """ + Statistics for function execution. + + Tracks execution statistics such as memory usage, cold start status, + and execution failure. + + Attributes: + memory_used: Amount of memory used in MB (if available) + cold_start: Whether this was a cold start execution + failure: Whether the execution failed + """ memory_used: Optional[float] cold_start: bool failure: bool def __init__(self): + """Initialize with default values.""" self.memory_used = None self.cold_start = False self.failure = False @staticmethod def deserialize(cached_obj: dict) -> "ExecutionStats": + """ + Create an ExecutionStats instance from a dictionary. + + Args: + cached_obj: Dictionary containing serialized statistics + + Returns: + ExecutionStats: New instance with the deserialized data + """ ret = ExecutionStats() ret.__dict__.update(cached_obj) return ret class ExecutionBilling: + """ + Billing information for function execution. + + Tracks billing-related metrics such as allocated memory, + billed execution time, and GB-seconds consumed. + + Attributes: + memory: Allocated memory in MB + billed_time: Billed execution time in milliseconds + gb_seconds: GB-seconds consumed (memory/1024 * billed_time/1000) + """ _memory: Optional[int] _billed_time: Optional[int] _gb_seconds: int def __init__(self): + """Initialize with default values.""" self.memory = None self.billed_time = None self.gb_seconds = 0 @property def memory(self) -> Optional[int]: + """ + Get the allocated memory in MB. + + Returns: + int: Memory allocation in MB, or None if not available + """ return self._memory @memory.setter def memory(self, val: int): + """ + Set the allocated memory in MB. + + Args: + val: Memory allocation in MB + """ self._memory = val @property def billed_time(self) -> Optional[int]: + """ + Get the billed execution time in milliseconds. + + Returns: + int: Billed time in milliseconds, or None if not available + """ return self._billed_time @billed_time.setter def billed_time(self, val: int): + """ + Set the billed execution time in milliseconds. + + Args: + val: Billed time in milliseconds + """ self._billed_time = val @property def gb_seconds(self) -> int: + """ + Get the GB-seconds consumed. + + Returns: + int: GB-seconds consumed + """ return self._gb_seconds @gb_seconds.setter def gb_seconds(self, val: int): + """ + Set the GB-seconds consumed. + + Args: + val: GB-seconds consumed + """ self._gb_seconds = val @staticmethod def deserialize(cached_obj: dict) -> "ExecutionBilling": + """ + Create an ExecutionBilling instance from a dictionary. + + Args: + cached_obj: Dictionary containing serialized billing data + + Returns: + ExecutionBilling: New instance with the deserialized data + """ ret = ExecutionBilling() ret.__dict__.update(cached_obj) return ret class ExecutionResult: + """ + Comprehensive result of a function execution. + + This class captures all timing information, provider metrics, and function + output from a single function invocation. It provides methods for parsing + benchmark output and calculating metrics. + + Attributes: + output: Dictionary containing function output + request_id: Unique identifier for the request + times: ExecutionTimes containing client-side timing measurements + provider_times: ProviderTimes containing platform-reported timings + stats: ExecutionStats containing resource usage statistics + billing: ExecutionBilling containing cost-related information + """ output: dict request_id: str @@ -125,6 +274,7 @@ class ExecutionResult: billing: ExecutionBilling def __init__(self): + """Initialize with default values for all components.""" self.output = {} self.request_id = "" self.times = ExecutionTimes() @@ -134,6 +284,16 @@ def __init__(self): @staticmethod def from_times(client_time_begin: datetime, client_time_end: datetime) -> "ExecutionResult": + """ + Create an ExecutionResult with client-side timing information. + + Args: + client_time_begin: Timestamp when the request was initiated + client_time_end: Timestamp when the response was received + + Returns: + ExecutionResult: New instance with calculated client-side timing + """ ret = ExecutionResult() ret.times.client_begin = client_time_begin ret.times.client_end = client_time_end @@ -141,6 +301,17 @@ def from_times(client_time_begin: datetime, client_time_end: datetime) -> "Execu return ret def parse_benchmark_output(self, output: dict): + """ + Parse the output from a benchmark execution. + + Extracts timing information and cold start status from the benchmark output. + + Args: + output: Dictionary containing benchmark output + + Raises: + RuntimeError: If the invocation failed (missing required fields) + """ self.output = output # FIXME: temporary handling of errorenous invocation if "is_cold" not in self.output: @@ -156,6 +327,15 @@ def parse_benchmark_output(self, output: dict): @staticmethod def deserialize(cached_config: dict) -> "ExecutionResult": + """ + Create an ExecutionResult instance from a cached configuration. + + Args: + cached_config: Dictionary containing serialized execution result + + Returns: + ExecutionResult: New instance with the deserialized data + """ ret = ExecutionResult() ret.times = ExecutionTimes.deserialize(cached_config["times"]) ret.billing = ExecutionBilling.deserialize(cached_config["billing"]) @@ -166,28 +346,64 @@ def deserialize(cached_config: dict) -> "ExecutionResult": return ret -""" - Function trigger and implementation of invocation. - - FIXME: implement a generic HTTP invocation and specialize input and output - processing in classes. -""" - - class Trigger(ABC, LoggingBase): + """ + Abstract base class for function triggers. + + A trigger represents a mechanism for invoking a serverless function, + such as HTTP requests, direct SDK invocations, or event-based triggers. + Each trigger type implements synchronous and asynchronous invocation methods. + """ class TriggerType(Enum): + """ + Enumeration of supported trigger types. + + Defines the different mechanisms for invoking serverless functions: + - HTTP: Invocation via HTTP requests + - LIBRARY: Invocation via cloud provider SDK + - STORAGE: Invocation via storage events + """ HTTP = "http" LIBRARY = "library" STORAGE = "storage" @staticmethod def get(name: str) -> "Trigger.TriggerType": + """ + Get a TriggerType by name (case-insensitive). + + Args: + name: Name of the trigger type + + Returns: + TriggerType: The matching trigger type + + Raises: + Exception: If no matching trigger type is found + """ for member in Trigger.TriggerType: if member.value.lower() == name.lower(): return member - raise Exception("Unknown trigger type {}".format(member)) + raise Exception("Unknown trigger type {}".format(name)) def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> ExecutionResult: + """ + Invoke a function via HTTP request. + + Makes a HTTP POST request to the given URL with the provided payload + and processes the response into an ExecutionResult. + + Args: + payload: Dictionary containing the function input + url: URL to invoke the function + verify_ssl: Whether to verify SSL certificates + + Returns: + ExecutionResult: Result of the function execution + + Raises: + RuntimeError: If the invocation fails or produces invalid output + """ import pycurl from io import BytesIO @@ -236,69 +452,170 @@ def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> Exec self.logging.error("No output provided!") raise RuntimeError(f"Failed invocation of function! Output: {data.getvalue().decode()}") - # FIXME: 3.7+, future annotations @staticmethod @abstractmethod def trigger_type() -> "Trigger.TriggerType": + """ + Get the type of this trigger. + + Returns: + TriggerType: The type of this trigger + """ pass @abstractmethod def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke a function with the given payload. + + Args: + payload: Dictionary containing the function input + + Returns: + ExecutionResult: Result of the function execution + """ pass @abstractmethod def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke a function with the given payload. + + Args: + payload: Dictionary containing the function input + + Returns: + Future: Future object representing the pending execution + """ pass @abstractmethod def serialize(self) -> dict: + """ + Serialize the trigger to a dictionary. + + Returns: + dict: Dictionary representation of the trigger + """ pass @staticmethod @abstractmethod def deserialize(cached_config: dict) -> "Trigger": + """ + Create a Trigger instance from a cached configuration. + + Args: + cached_config: Dictionary containing serialized trigger + + Returns: + Trigger: New instance with the deserialized data + """ pass class Language(Enum): + """ + Enumeration of supported programming languages. + + Currently supports Python and Node.js for serverless functions. + """ PYTHON = "python" NODEJS = "nodejs" - # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(val: str) -> Language: + """ + Get a Language by string value. + + Args: + val: String representation of the language + + Returns: + Language: The matching language enum + + Raises: + Exception: If no matching language is found + """ for member in Language: if member.value == val: return member - raise Exception(f"Unknown language type {member}") + raise Exception(f"Unknown language type {val}") class Architecture(Enum): + """ + Enumeration of supported CPU architectures. + + Defines the CPU architectures that can be targeted for function deployment. + """ X86 = "x64" ARM = "arm64" def serialize(self) -> str: + """ + Serialize the architecture to a string. + + Returns: + str: String representation of the architecture + """ return self.value @staticmethod def deserialize(val: str) -> Architecture: + """ + Get an Architecture by string value. + + Args: + val: String representation of the architecture + + Returns: + Architecture: The matching architecture enum + + Raises: + Exception: If no matching architecture is found + """ for member in Architecture: if member.value == val: return member - raise Exception(f"Unknown architecture type {member}") + raise Exception(f"Unknown architecture type {val}") @dataclass class Runtime: + """ + Runtime configuration for a serverless function. + + Defines the language and version for a function's runtime environment. + + Attributes: + language: Programming language (Python, Node.js) + version: Version string of the language runtime + """ language: Language version: str def serialize(self) -> dict: + """ + Serialize the runtime to a dictionary. + + Returns: + dict: Dictionary representation of the runtime + """ return {"language": self.language.value, "version": self.version} @staticmethod def deserialize(config: dict) -> Runtime: + """ + Create a Runtime instance from a dictionary. + + Args: + config: Dictionary containing serialized runtime + + Returns: + Runtime: New instance with the deserialized data + """ languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS} return Runtime(language=languages[config["language"]], version=config["version"]) @@ -308,6 +625,17 @@ def deserialize(config: dict) -> Runtime: @dataclass class FunctionConfig: + """ + Configuration for a serverless function. + + Defines the resources, runtime, and architecture for a function deployment. + + Attributes: + timeout: Maximum execution time in seconds + memory: Memory allocation in MB + runtime: Runtime environment configuration + architecture: CPU architecture for deployment + """ timeout: int memory: int runtime: Runtime @@ -315,6 +643,16 @@ class FunctionConfig: @staticmethod def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: + """ + Create a FunctionConfig subclass instance from a benchmark. + + Args: + benchmark: Benchmark to extract configuration from + obj_type: Type of FunctionConfig to create + + Returns: + T: New instance of the specified FunctionConfig subclass + """ runtime = Runtime(language=benchmark.language, version=benchmark.language_version) architecture = Architecture.deserialize(benchmark._experiment_config._architecture) cfg = obj_type( @@ -327,28 +665,72 @@ def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: @staticmethod def from_benchmark(benchmark: Benchmark) -> FunctionConfig: + """ + Create a FunctionConfig instance from a benchmark. + + Args: + benchmark: Benchmark to extract configuration from + + Returns: + FunctionConfig: New instance with the benchmark's configuration + """ return FunctionConfig._from_benchmark(benchmark, FunctionConfig) @staticmethod def deserialize(data: dict) -> FunctionConfig: + """ + Create a FunctionConfig instance from a dictionary. + + Args: + data: Dictionary containing serialized function configuration + + Returns: + FunctionConfig: New instance with the deserialized data + """ keys = list(FunctionConfig.__dataclass_fields__.keys()) data = {k: v for k, v in data.items() if k in keys} data["runtime"] = Runtime.deserialize(data["runtime"]) return FunctionConfig(**data) def serialize(self) -> dict: + """ + Serialize the function configuration to a dictionary. + + Returns: + dict: Dictionary representation of the function configuration + """ return self.__dict__ -""" - Abstraction base class for FaaS function. Contains a list of associated triggers - and might implement non-trigger execution if supported by the SDK. - Example: direct function invocation through AWS boto3 SDK. -""" - - class Function(LoggingBase): + """ + Abstract base class for serverless functions. + + This class represents a deployed serverless function with its configuration + and associated triggers. It provides a unified interface for managing function + deployments across different cloud providers. + + Each cloud provider (AWS, Azure, GCP, etc.) implements a subclass with + platform-specific functionality. + + Attributes: + config: Function configuration + name: Name of the deployed function + benchmark: Name of the benchmark implemented by this function + code_package_hash: Hash of the deployed code package + updated_code: Whether the code has been updated since deployment + """ + def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfig): + """ + Initialize a Function instance. + + Args: + benchmark: Name of the benchmark + name: Name of the function + code_hash: Hash of the code package + cfg: Function configuration + """ super().__init__() self._benchmark = benchmark self._name = name @@ -359,48 +741,117 @@ def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfi @property def config(self) -> FunctionConfig: + """ + Get the function configuration. + + Returns: + FunctionConfig: Configuration of the function + """ return self._cfg @property - def name(self): + def name(self) -> str: + """ + Get the name of the function. + + Returns: + str: Name of the function + """ return self._name @property - def benchmark(self): + def benchmark(self) -> str: + """ + Get the name of the benchmark. + + Returns: + str: Name of the benchmark + """ return self._benchmark @property - def code_package_hash(self): + def code_package_hash(self) -> str: + """ + Get the hash of the code package. + + Returns: + str: Hash of the code package + """ return self._code_package_hash @code_package_hash.setter def code_package_hash(self, new_hash: str): + """ + Set the hash of the code package. + + Args: + new_hash: New hash of the code package + """ self._code_package_hash = new_hash @property def updated_code(self) -> bool: + """ + Check if the code has been updated since deployment. + + Returns: + bool: True if the code has been updated, False otherwise + """ return self._updated_code @updated_code.setter def updated_code(self, val: bool): + """ + Set whether the code has been updated since deployment. + + Args: + val: True if the code has been updated, False otherwise + """ self._updated_code = val def triggers_all(self) -> List[Trigger]: + """ + Get all triggers associated with this function. + + Returns: + List[Trigger]: List of all triggers + """ return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: + """ + Get triggers of a specific type associated with this function. + + Args: + trigger_type: Type of triggers to get + + Returns: + List[Trigger]: List of triggers of the specified type + """ try: return self._triggers[trigger_type] except KeyError: return [] def add_trigger(self, trigger: Trigger): + """ + Add a trigger to this function. + + Args: + trigger: Trigger to add + """ if trigger.trigger_type() not in self._triggers: self._triggers[trigger.trigger_type()] = [trigger] else: self._triggers[trigger.trigger_type()].append(trigger) def serialize(self) -> dict: + """ + Serialize the function to a dictionary. + + Returns: + dict: Dictionary representation of the function + """ return { "name": self._name, "hash": self._code_package_hash, @@ -414,4 +865,13 @@ def serialize(self) -> dict: @staticmethod @abstractmethod def deserialize(cached_config: dict) -> "Function": + """ + Create a Function instance from a cached configuration. + + Args: + cached_config: Dictionary containing serialized function + + Returns: + Function: New instance with the deserialized data + """ pass diff --git a/sebs/faas/nosql.py b/sebs/faas/nosql.py index 16f9ab11..2be96fbf 100644 --- a/sebs/faas/nosql.py +++ b/sebs/faas/nosql.py @@ -1,3 +1,12 @@ +""" +Module for NoSQL database storage abstraction in the Serverless Benchmarking Suite. + +This module provides an abstract base class for NoSQL database implementations +across different cloud platforms (AWS DynamoDB, Azure CosmosDB, Google Cloud Datastore) +and local development environments. It handles table creation, data writing, and +cache management for benchmark data stored in NoSQL databases. +""" + from abc import ABC from abc import abstractmethod from typing import Dict, Optional, Tuple @@ -8,20 +17,59 @@ class NoSQLStorage(ABC, LoggingBase): + """ + Abstract base class for NoSQL database storage implementations. + + This class defines the interface for NoSQL database operations across different + cloud platforms and local environments. Concrete implementations handle the + platform-specific details of creating tables, writing data, and managing + resources. + + Attributes: + cache_client: Client for caching database information + region: Cloud region where the database is deployed + """ + @staticmethod @abstractmethod def deployment_name() -> str: + """ + Get the name of the deployment platform. + + Returns: + str: Name of the deployment platform (e.g., 'aws', 'azure', 'gcp') + """ pass @property def cache_client(self) -> Cache: + """ + Get the cache client. + + Returns: + Cache: The cache client for database information + """ return self._cache_client @property - def region(self): + def region(self) -> str: + """ + Get the cloud region. + + Returns: + str: The cloud region where the database is deployed + """ return self._region def __init__(self, region: str, cache_client: Cache, resources: Resources): + """ + Initialize a NoSQL storage instance. + + Args: + region: Cloud region where the database is deployed + cache_client: Client for caching database information + resources: Resource configuration for the database + """ super().__init__() self._cache_client = cache_client self._cached = False @@ -30,40 +78,92 @@ def __init__(self, region: str, cache_client: Cache, resources: Resources): @abstractmethod def get_tables(self, benchmark: str) -> Dict[str, str]: + """ + Get all tables associated with a benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Dict[str, str]: Dictionary mapping table logical names to physical table names + """ pass @abstractmethod def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """ + Get the physical table name for a benchmark's logical table. + + Args: + benchmark: Name of the benchmark + table: Logical name of the table + + Returns: + Optional[str]: Physical table name if it exists, None otherwise + """ pass @abstractmethod def retrieve_cache(self, benchmark: str) -> bool: + """ + Retrieve cached table information for a benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + bool: True if cache was successfully retrieved, False otherwise + """ pass @abstractmethod def update_cache(self, benchmark: str): + """ + Update the cache with the latest table information for a benchmark. + + Args: + benchmark: Name of the benchmark + """ pass def envs(self) -> dict: + """ + Get environment variables required for connecting to the NoSQL storage. + + Returns: + dict: Dictionary of environment variables + """ return {} """ - Each table name follow this pattern: - sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name} - - Each implementation should do the following - (1) Retrieve cached data - (2) Create missing table that do not exist - (3) Update cached data if anything new was created -> this is done separately - in benchmark.py once the data is uploaded by the benchmark. + Table naming convention and implementation requirements. + + Each table name follows this pattern: + sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name} + + Each implementation should do the following: + 1. Retrieve cached data + 2. Create missing tables that do not exist + 3. Update cached data if anything new was created (done separately + in benchmark.py once the data is uploaded by the benchmark) """ def create_benchmark_tables( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ): - + """ + Create a table for a benchmark if it doesn't exist in the cache. + + Checks if the table already exists in the cache. If not, creates a new table + with the specified keys. + + Args: + benchmark: Name of the benchmark + name: Logical name of the table + primary_key: Primary key field name + secondary_key: Optional secondary key field name + """ if self.retrieve_cache(benchmark): - table_name = self._get_table_name(benchmark, name) if table_name is not None: self.logging.info( @@ -72,20 +172,32 @@ def create_benchmark_tables( return self.logging.info(f"Preparing to create a NoSQL table {name} for benchmark {benchmark}") - self.create_table(benchmark, name, primary_key, secondary_key) """ - - AWS: DynamoDB Table - Azure: CosmosDB Container - Google Cloud: Firestore in Datastore Mode, Database + Platform-specific table implementations: + + - AWS: DynamoDB Table + - Azure: CosmosDB Container + - Google Cloud: Firestore in Datastore Mode, Database """ @abstractmethod def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: + """ + Create a new table for a benchmark. + + Args: + benchmark: Name of the benchmark + name: Logical name of the table + primary_key: Primary key field name + secondary_key: Optional secondary key field name + + Returns: + str: Physical name of the created table + """ pass @abstractmethod @@ -97,22 +209,48 @@ def write_to_table( primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, ): + """ + Write data to a table. + + Args: + benchmark: Name of the benchmark + table: Logical name of the table + data: Dictionary of data to write + primary_key: Tuple of (key_name, key_value) for the primary key + secondary_key: Optional tuple of (key_name, key_value) for the secondary key + """ pass """ - - AWS DynamoDB: Removing & recreating table is the cheapest & fastest option - - Azure CosmosDB: recreate container - - Google Cloud: also likely recreate - + Table management operations: + + - AWS DynamoDB: Removing & recreating table is the cheapest & fastest option + - Azure CosmosDB: Recreate container + - Google Cloud: Also likely recreate """ @abstractmethod def clear_table(self, name: str) -> str: + """ + Clear all data from a table. + + Args: + name: Name of the table to clear + + Returns: + str: Result message or status + """ pass @abstractmethod def remove_table(self, name: str) -> str: + """ + Remove a table completely. + + Args: + name: Name of the table to remove + + Returns: + str: Result message or status + """ pass diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 92eb1445..ebc421eb 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -204,7 +204,6 @@ def benchmark_data( return self.input_prefixes, self.output_prefixes - def get_bucket(self, bucket_type: Resources.StorageBucketType) -> str: bucket = self._cloud_resources.get_storage_bucket(bucket_type) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 9fbe0e27..270f7433 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -1,3 +1,13 @@ +""" +Module providing the core abstraction for Function-as-a-Service (FaaS) systems. + +This module defines the base System class that provides consistent interfaces for +working with different serverless platforms (AWS Lambda, Azure Functions, Google Cloud +Functions, OpenWhisk, etc.). It handles function lifecycle management, code packaging, +deployment, triggering, and metrics collection while abstracting away platform-specific +details. +""" + from abc import ABC from abc import abstractmethod from random import randrange @@ -15,16 +25,32 @@ from sebs.utils import LoggingBase from .config import Config -""" - This class provides basic abstractions for the FaaS system. - It provides the interface for initialization of the system and storage - services, creation and update of serverless functions and querying - logging and measurements services to obtain error messages and performance - measurements. -""" - class System(ABC, LoggingBase): + """ + Abstract base class for FaaS system implementations. + + This class provides basic abstractions for all supported FaaS platforms. + It defines the interface for system initialization, resource management, + function deployment, code packaging, function invocation, and metrics collection. + Each cloud provider implements a concrete subclass of this abstract base. + + The class handles: + - System and storage service initialization + - Creation and updating of serverless functions + - Function code packaging and deployment + - Trigger creation and management + - Metrics collection and error handling + - Caching of functions to avoid redundant deployments + - Cold start management + + Attributes: + system_config: Global SeBS configuration + docker_client: Docker client for building code packages and containers + cache_client: Cache client for storing function and deployment information + cold_start_counter: Counter for generating unique function names to force cold starts + system_resources: Resources manager for the specific cloud platform + """ def __init__( self, system_config: SeBSConfig, @@ -32,60 +58,133 @@ def __init__( docker_client: docker.client, system_resources: SystemResources, ): + """ + Initialize a FaaS system implementation. + + Args: + system_config: Global SeBS configuration settings + cache_client: Cache client for storing function and deployment information + docker_client: Docker client for building code packages and containers + system_resources: Resources manager for the specific cloud platform + """ super().__init__() self._system_config = system_config self._docker_client = docker_client self._cache_client = cache_client + # Initialize with random value to help with cold start detection/forcing self._cold_start_counter = randrange(100) - self._system_resources = system_resources @property def system_config(self) -> SeBSConfig: + """ + Get the global SeBS configuration. + + Returns: + SeBSConfig: The system configuration + """ return self._system_config @property def docker_client(self) -> docker.client: + """ + Get the Docker client. + + Returns: + docker.client: The Docker client + """ return self._docker_client @property def cache_client(self) -> Cache: + """ + Get the cache client. + + Returns: + Cache: The cache client + """ return self._cache_client @property def cold_start_counter(self) -> int: + """ + Get the cold start counter. + + This counter is used in function name generation to help force cold starts + by creating new function instances with different names. + + Returns: + int: The current cold start counter value + """ return self._cold_start_counter @cold_start_counter.setter def cold_start_counter(self, val: int): + """ + Set the cold start counter. + + Args: + val: The new counter value + """ self._cold_start_counter = val @property @abstractmethod def config(self) -> Config: + """ + Get the platform-specific configuration. + + Returns: + Config: The platform-specific configuration + """ pass @property def system_resources(self) -> SystemResources: + """ + Get the platform-specific resources manager. + + Returns: + SystemResources: The resources manager + """ return self._system_resources @staticmethod @abstractmethod def function_type() -> "Type[Function]": + """ + Get the platform-specific Function class type. + + Returns: + Type[Function]: The Function class for this platform + """ pass def find_deployments(self) -> List[str]: - """ - Default implementation that uses storage buckets. - data storage accounts. - This can be overriden, e.g., in Azure that looks for unique + Find existing deployments in the cloud platform. + + Default implementation uses storage buckets to identify deployments. + This can be overridden by platform-specific implementations, e.g., + Azure that looks for unique storage accounts. + + Returns: + List[str]: List of existing deployment resource IDs """ - return self.system_resources.get_storage().find_deployments() def initialize_resources(self, select_prefix: Optional[str]): - + """ + Initialize cloud resources for the deployment. + + This method either: + 1. Uses an existing resource ID from configuration + 2. Finds and reuses an existing deployment matching the prefix + 3. Creates a new unique resource ID and initializes resources + + Args: + select_prefix: Optional prefix to match when looking for existing deployments + """ # User provided resources or found in cache if self.config.resources.has_resources_id: self.logging.info( @@ -98,7 +197,6 @@ def initialize_resources(self, select_prefix: Optional[str]): # If a prefix is specified, we find the first matching resource ID if select_prefix is not None: - for dep in deployments: if select_prefix in dep: self.logging.info( @@ -117,7 +215,7 @@ def initialize_resources(self, select_prefix: Optional[str]): ) self.logging.warning("Deployment resource IDs in the cloud: " f"{deployments}") - # create + # Create a new unique resource ID res_id = "" if select_prefix is not None: res_id = f"{select_prefix}-{str(uuid.uuid1())[0:8]}" @@ -125,31 +223,46 @@ def initialize_resources(self, select_prefix: Optional[str]): res_id = str(uuid.uuid1())[0:8] self.config.resources.resources_id = res_id self.logging.info(f"Generating unique resource name {res_id}") - # ensure that the bucket is created - this allocates the new resource + + # Ensure that the bucket is created - this allocates the new resource self.system_resources.get_storage().get_bucket(Resources.StorageBucketType.BENCHMARKS) - """ - Initialize the system. After the call the local or remote - FaaS system should be ready to allocate functions, manage - storage resources and invoke functions. - - :param config: systems-specific parameters - """ - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize the system. + + After this call completes, the local or remote FaaS system should be ready + to allocate functions, manage storage resources, and invoke functions. + + Args: + config: System-specific parameters + resource_prefix: Optional prefix for resource naming + """ pass - """ - Apply the system-specific code packaging routine to build benchmark. + @abstractmethod + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + architecture: str, + benchmark: str, + is_cached: bool, + container_deployment: bool, + ) -> Tuple[str, int, str]: + """ + Apply system-specific code packaging to prepare a deployment package. + The benchmark creates a code directory with the following structure: - [benchmark sources] - [benchmark resources] - [dependence specification], e.g. requirements.txt or package.json - [handlers implementation for the language and deployment] - This step allows us to change the structure above to fit different - deployment requirements, Example: a zip file for AWS or a specific - + This step transforms that structure to fit platform-specific deployment + requirements, such as creating a zip file for AWS or container image. + Args: directory: Path to the code directory language_name: Programming language name @@ -162,21 +275,9 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] Returns: Tuple containing: - Path to packaged code - - Size of the package - - Container URI - """ - - @abstractmethod - def package_code( - self, - directory: str, - language_name: str, - language_version: str, - architecture: str, - benchmark: str, - is_cached: bool, - container_deployment: bool, - ) -> Tuple[str, int, str]: + - Size of the package in bytes + - Container URI (if container deployment, otherwise empty string) + """ pass @abstractmethod @@ -209,6 +310,16 @@ def create_function( @abstractmethod def cached_function(self, function: Function): + """ + Perform any necessary operations for a cached function. + + This method is called when a function is found in the cache. It may perform + platform-specific operations such as checking if the function still exists + in the cloud, updating permissions, etc. + + Args: + function: The cached function instance + """ pass @abstractmethod @@ -233,21 +344,29 @@ def update_function( """ pass - """ - a) if a cached function with given name is present and code has not changed, - then just return function name - b) if a cached function is present and the cloud code has a different - code version, then upload new code - c) if no cached function is present, then create code package and - either create new function or update an existing but uncached one - - Benchmark rebuild is requested but will be skipped if source code is - not changed and user didn't request update. - - """ - def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) -> Function: - + """ + Get or create a function for a benchmark. + + This method handles the complete function creation/update workflow: + + 1. If a cached function with the given name exists and code has not changed, + returns the existing function + 2. If a cached function exists but the code has changed, updates the + function with the new code + 3. If no cached function exists, creates a new function + + Args: + code_package: The benchmark containing the function code + func_name: Optional name for the function (will be generated if not provided) + + Returns: + Function: The function instance + + Raises: + Exception: If the language version is not supported by this platform + """ + # Verify language version compatibility if code_package.language_version not in self.system_config.supported_language_versions( self.name(), code_package.language_name, code_package.architecture ): @@ -259,35 +378,32 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) ) ) + # Generate function name if not provided if not func_name: func_name = self.default_function_name(code_package) + + # Build the code package rebuilt, _, container_deployment, container_uri = code_package.build(self.package_code) - """ - There's no function with that name? - a) yes -> create new function. Implementation might check if a function - with that name already exists in the cloud and update its code. - b) no -> retrieve function from the cache. Function code in cloud will - be updated if the local version is different. - """ + # Check if function exists in cache functions = code_package.functions - is_function_cached = not (not functions or func_name not in functions) + if is_function_cached: - # retrieve function + # Retrieve function from cache cached_function = functions[func_name] code_location = code_package.code_location try: function = self.function_type().deserialize(cached_function) except RuntimeError as e: - self.logging.error( f"Cached function {cached_function['name']} is no longer available." ) self.logging.error(e) is_function_cached = False + # Create new function if not cached or deserialize failed if not is_function_cached: msg = ( "function name not provided." @@ -307,13 +423,14 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) code_package.query_cache() return function else: - + # Handle existing function assert function is not None self.cached_function(function) self.logging.info( "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_location) ) - # is the function up-to-date? + + # Check if code needs to be updated if function.code_package_hash != code_package.hash or rebuilt: if function.code_package_hash != code_package.hash: self.logging.info( @@ -324,9 +441,11 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) ) if rebuilt: self.logging.info( - f"Enforcing rebuild and update of of cached function " + f"Enforcing rebuild and update of cached function " f"{func_name} with hash {function.code_package_hash}." ) + + # Update function code self.update_function(function, code_package, container_deployment, container_uri) function.code_package_hash = code_package.hash function.updated_code = True @@ -337,28 +456,48 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) function=function, ) code_package.query_cache() - # code up to date, but configuration needs to be updated - # FIXME: detect change in function config + + # Check if configuration needs to be updated elif self.is_configuration_changed(function, code_package): self.update_function_configuration(function, code_package) self.cache_client.update_function(function) code_package.query_cache() else: self.logging.info(f"Cached function {func_name} is up to date.") + return function @abstractmethod def update_function_configuration(self, cached_function: Function, benchmark: Benchmark): + """ + Update the configuration of an existing function. + + This method is called when a function's code is up-to-date but its + configuration (memory, timeout, etc.) needs to be updated. + + Args: + cached_function: The function to update + benchmark: The benchmark containing the new configuration + """ pass - """ - This function checks for common function parameters to verify if their value is - still up to date. - """ - def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: - + """ + Check if a function's configuration needs to be updated. + + This function checks for common function parameters to verify if their + values are still up to date with the benchmark configuration. + + Args: + cached_function: The existing function + benchmark: The benchmark with potential new configuration + + Returns: + bool: True if configuration has changed, False otherwise + """ changed = False + + # Check common configuration attributes for attr in ["timeout", "memory"]: new_val = getattr(benchmark.benchmark_config, attr) old_val = getattr(cached_function.config, attr) @@ -370,6 +509,7 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma changed = True setattr(cached_function.config, attr, new_val) + # Check language/runtime attributes for lang_attr in [["language"] * 2, ["language_version", "version"]]: new_val = getattr(benchmark, lang_attr[0]) old_val = getattr(cached_function.config.runtime, lang_attr[1]) @@ -389,10 +529,30 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """ + Generate a default function name for a benchmark. + + Args: + code_package: The benchmark to generate a name for + resources: Optional resources configuration + + Returns: + str: Generated function name + """ pass @abstractmethod def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + """ + Force cold starts for the specified functions. + + This method implements platform-specific techniques to ensure that + subsequent invocations of the functions will be cold starts. + + Args: + functions: List of functions to enforce cold starts for + code_package: The benchmark associated with the functions + """ pass @abstractmethod @@ -404,26 +564,48 @@ def download_metrics( requests: Dict[str, ExecutionResult], metrics: dict, ): + """ + Download function metrics from the cloud platform. + + Args: + function_name: Name of the function to get metrics for + start_time: Start timestamp for metrics collection + end_time: End timestamp for metrics collection + requests: Dictionary of execution results + metrics: Dictionary to store the downloaded metrics + """ pass @abstractmethod def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a trigger for a function. + + Args: + function: The function to create a trigger for + trigger_type: Type of trigger to create + + Returns: + Trigger: The created trigger + """ pass def disable_rich_output(self): + """ + Disable rich output for platforms that support it. + + This is mostly used in testing environments or CI pipelines. + """ pass - # @abstractmethod - # def get_invocation_error(self, function_name: str, - # start_time: int, end_time: int): - # pass - - """ - Shutdown local FaaS instances, connections and clients. - """ - @abstractmethod def shutdown(self) -> None: + """ + Shutdown the FaaS system. + + Closes connections, stops local instances, and updates the cache. + This should be called when the system is no longer needed. + """ try: self.cache_client.lock() self.config.update_cache(self.cache_client) @@ -433,4 +615,10 @@ def shutdown(self) -> None: @staticmethod @abstractmethod def name() -> str: + """ + Get the name of the platform. + + Returns: + str: Platform name (e.g., 'aws', 'azure', 'gcp') + """ pass diff --git a/sebs/regression.py b/sebs/regression.py index 579760a1..7282482f 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -1,3 +1,21 @@ +"""Regression testing framework for serverless benchmarks across cloud providers. + +This module provides a flexible testing framework to validate benchmark functionality +across multiple cloud providers, runtimes, architectures, and deployment methods. +It automatically generates test cases for each valid combination and runs them +concurrently to efficiently validate the system. + +The module supports: +- AWS Lambda +- Azure Functions +- Google Cloud Functions +- OpenWhisk +- Multiple runtime languages (Python, Node.js) +- Multiple architectures (x64, arm64) +- Different deployment types (package, container) +- Different trigger types (HTTP, library) +""" + import logging import os import unittest @@ -13,38 +31,56 @@ if TYPE_CHECKING: from sebs import SeBS +# List of Python benchmarks available for regression testing benchmarks_python = [ - "110.dynamic-html", - "120.uploader", - "130.crud-api", - "210.thumbnailer", - "220.video-processing", - "311.compression", - "411.image-recognition", - "501.graph-pagerank", - "502.graph-mst", - "503.graph-bfs", - "504.dna-visualisation", + "110.dynamic-html", # Dynamic HTML generation + "120.uploader", # File upload handling + "130.crud-api", # CRUD API implementation + "210.thumbnailer", # Image thumbnail generation + "220.video-processing", # Video processing + "311.compression", # Data compression + "411.image-recognition", # ML-based image recognition + "501.graph-pagerank", # Graph PageRank algorithm + "502.graph-mst", # Graph minimum spanning tree + "503.graph-bfs", # Graph breadth-first search + "504.dna-visualisation", # DNA visualization ] + +# List of Node.js benchmarks available for regression testing benchmarks_nodejs = ["110.dynamic-html", "120.uploader", "210.thumbnailer"] -architectures_aws = ["x64", "arm64"] -deployments_aws = ["package", "container"] +# AWS-specific configurations +architectures_aws = ["x64", "arm64"] # Supported architectures +deployments_aws = ["package", "container"] # Deployment types -architectures_gcp = ["x64"] -deployments_gcp = ["package"] +# GCP-specific configurations +architectures_gcp = ["x64"] # Supported architectures +deployments_gcp = ["package"] # Deployment types -architectures_azure = ["x64"] -deployments_azure = ["package"] +# Azure-specific configurations +architectures_azure = ["x64"] # Supported architectures +deployments_azure = ["package"] # Deployment types -architectures_openwhisk = ["x64"] -deployments_openwhisk = ["container"] +# OpenWhisk-specific configurations +architectures_openwhisk = ["x64"] # Supported architectures +deployments_openwhisk = ["container"] # Deployment types -# user-defined config passed during initialization +# User-defined config passed during initialization, set in regression_suite() cloud_config: Optional[dict] = None class TestSequenceMeta(type): + """Metaclass for dynamically generating regression test cases. + + This metaclass automatically generates test methods for all combinations of + benchmark, architecture, and deployment type. Each test method deploys and + executes a specific benchmark on a specific cloud provider with a specific + configuration. + + The generated tests follow a naming convention: + test_{provider}_{benchmark}_{architecture}_{deployment_type} + """ + def __init__( cls, name, @@ -56,6 +92,19 @@ def __init__( deployment_name, triggers, ): + """Initialize the test class with deployment information. + + Args: + cls: The class being created + name: The name of the class + bases: Base classes + attrs: Class attributes + benchmarks: List of benchmark names to test + architectures: List of architectures to test (e.g., x64, arm64) + deployments: List of deployment types to test (e.g., package, container) + deployment_name: Name of the cloud provider (e.g., aws, azure) + triggers: List of trigger types to test (e.g., HTTP, library) + """ type.__init__(cls, name, bases, attrs) cls.deployment_name = deployment_name cls.triggers = triggers @@ -71,16 +120,59 @@ def __new__( deployment_name, triggers, ): + """Create a new test class with dynamically generated test methods. + + Args: + mcs: The metaclass + name: The name of the class + bases: Base classes + dict: Class attributes dictionary + benchmarks: List of benchmark names to test + architectures: List of architectures to test + deployments: List of deployment types to test + deployment_name: Name of the cloud provider + triggers: List of trigger types to test + + Returns: + A new test class with dynamically generated test methods + """ + def gen_test(benchmark_name, architecture, deployment_type): + """Generate a test function for a specific benchmark configuration. + + Args: + benchmark_name: Name of the benchmark to test + architecture: Architecture to test on + deployment_type: Deployment type to use + + Returns: + A test function that deploys and executes the benchmark + """ + def test(self): + """Test function that deploys and executes a benchmark. + + This function: + 1. Sets up logging + 2. Gets a deployment client + 3. Configures the benchmark + 4. Deploys the function + 5. Invokes the function with different triggers + 6. Verifies the function execution + + Raises: + RuntimeError: If the benchmark execution fails + """ log_name = f"Regression-{deployment_name}-{benchmark_name}-{deployment_type}" logger = logging.getLogger(log_name) logger.setLevel(logging.INFO) logging_wrapper = ColoredWrapper(log_name, logger) + # Configure experiment settings self.experiment_config["architecture"] = architecture self.experiment_config["container_deployment"] = deployment_type == "container" + # Get deployment client for the specific cloud provider deployment_client = self.get_deployment( benchmark_name, architecture, deployment_type ) @@ -91,34 +183,37 @@ def test(self): f"Architecture {architecture}, deployment type: {deployment_type}." ) + # Get experiment configuration and deploy the benchmark experiment_config = self.client.get_experiment_config(self.experiment_config) - benchmark = self.client.get_benchmark( benchmark_name, deployment_client, experiment_config ) + + # Prepare input data for the benchmark input_config = benchmark.prepare_input( deployment_client.system_resources, size="test", replace_existing=experiment_config.update_storage, ) + + # Get or create the function func = deployment_client.get_function( benchmark, deployment_client.default_function_name(benchmark) ) + # Test each trigger type failure = False for trigger_type in triggers: if len(func.triggers(trigger_type)) > 0: trigger = func.triggers(trigger_type)[0] else: trigger = deployment_client.create_trigger(func, trigger_type) - """ - sleep 5 seconds - on some cloud systems the triggers might - not be available immediately. - for example, AWS tends to throw "not exist" on newly created - API gateway - """ + # Sleep to allow trigger creation to propagate + # Some cloud systems (e.g., AWS API Gateway) need time + # before the trigger is ready to use sleep(5) - # Synchronous invoke + + # Synchronous invoke to test function try: ret = trigger.sync_invoke(input_config) if ret.stats.failure: @@ -133,22 +228,27 @@ def test(self): except RuntimeError: failure = True logging_wrapper.error(f"{benchmark_name} fail on trigger: {trigger_type}") + + # Clean up resources deployment_client.shutdown() + + # Report overall test result if failure: raise RuntimeError(f"Test of {benchmark_name} failed!") return test + # Generate test methods for each combination for benchmark in benchmarks: for architecture in architectures: for deployment_type in deployments: - # for trigger in triggers: test_name = f"test_{deployment_name}_{benchmark}" test_name += f"_{architecture}_{deployment_type}" dict[test_name] = gen_test(benchmark, architecture, deployment_type) - dict["lock"] = threading.Lock() - dict["cfg"] = None + # Add shared resources + dict["lock"] = threading.Lock() # Lock for thread-safe initialization + dict["cfg"] = None # Shared configuration return type.__new__(mcs, name, bases, dict) @@ -161,20 +261,54 @@ class AWSTestSequencePython( deployment_name="aws", triggers=[Trigger.TriggerType.LIBRARY, Trigger.TriggerType.HTTP], ): + """Test suite for Python benchmarks on AWS Lambda. + + This test class runs all Python benchmarks on AWS Lambda, + using various architectures (x64, arm64) and deployment types + (package, container). Each test uses both library and HTTP triggers. + + Attributes: + benchmarks: List of Python benchmarks to test + architectures: List of AWS architectures to test (x64, arm64) + deployments: List of deployment types to test (package, container) + deployment_name: Cloud provider name ("aws") + triggers: List of trigger types to test (LIBRARY, HTTP) + """ + @property def typename(self) -> str: + """Get the type name of this test suite. + + Returns: + A string identifier for this test suite + """ return "AWSTestPython" def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an AWS deployment client for the specified configuration. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64, arm64) + deployment_type: Deployment type (package, container) + + Returns: + An initialized AWS deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "aws" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + # Create a log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), ) + # Synchronize resource initialization with a lock with AWSTestSequencePython.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -189,14 +323,45 @@ class AWSTestSequenceNodejs( deployment_name="aws", triggers=[Trigger.TriggerType.LIBRARY, Trigger.TriggerType.HTTP], ): + """Test suite for Node.js benchmarks on AWS Lambda. + + This test class runs all Node.js benchmarks on AWS Lambda, + using various architectures (x64, arm64) and deployment types + (package, container). Each test uses both library and HTTP triggers. + + Attributes: + benchmarks: List of Node.js benchmarks to test + architectures: List of AWS architectures to test (x64, arm64) + deployments: List of deployment types to test (package, container) + deployment_name: Cloud provider name ("aws") + triggers: List of trigger types to test (LIBRARY, HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an AWS deployment client for the specified configuration. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64, arm64) + deployment_type: Deployment type (package, container) + + Returns: + An initialized AWS deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "aws" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + + # Create a log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with AWSTestSequenceNodejs.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -211,10 +376,44 @@ class AzureTestSequencePython( deployment_name="azure", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Python benchmarks on Azure Functions. + + This test class runs all Python benchmarks on Azure Functions, + using x64 architecture and package deployment. Each test uses + HTTP triggers. + + Attributes: + benchmarks: List of Python benchmarks to test + architectures: List of Azure architectures to test (x64) + deployments: List of deployment types to test (package) + deployment_name: Cloud provider name ("azure") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an Azure deployment client for the specified configuration. + + This method handles special Azure setup requirements, including: + - Caching deployment configuration to avoid recreating it for each test + - Initializing the Azure CLI for resource management + - Setting up system resources with proper authentication + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (package) + + Returns: + An initialized Azure deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "azure" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + with AzureTestSequencePython.lock: + # Cache the deployment configuration for reuse across tests if not AzureTestSequencePython.cfg: AzureTestSequencePython.cfg = self.client.get_deployment_config( cloud_config["deployment"], @@ -224,11 +423,13 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): ), ) + # Initialize Azure CLI if not already done if not hasattr(AzureTestSequencePython, "cli"): AzureTestSequencePython.cli = AzureCLI( self.client.config, self.client.docker_client ) + # Create log file name and get deployment client f = f"regression_{deployment_name}_{benchmark_name}_" f += f"{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( @@ -236,6 +437,8 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): logging_filename=os.path.join(self.client.output_dir, f), deployment_config=AzureTestSequencePython.cfg, ) + + # Initialize CLI with login and setup resources deployment_client.system_resources.initialize_cli( cli=AzureTestSequencePython.cli, login=True ) @@ -252,28 +455,66 @@ class AzureTestSequenceNodejs( deployment_name="azure", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Node.js benchmarks on Azure Functions. + + This test class runs all Node.js benchmarks on Azure Functions, + using x64 architecture and package deployment. Each test uses + HTTP triggers. + + Attributes: + benchmarks: List of Node.js benchmarks to test + architectures: List of Azure architectures to test (x64) + deployments: List of deployment types to test (package) + deployment_name: Cloud provider name ("azure") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an Azure deployment client for the specified configuration. + + This method handles special Azure setup requirements, including: + - Caching deployment configuration to avoid recreating it for each test + - Initializing the Azure CLI for resource management + - Setting up system resources + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (package) + + Returns: + An initialized Azure deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "azure" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + with AzureTestSequenceNodejs.lock: + # Cache the deployment configuration for reuse across tests if not AzureTestSequenceNodejs.cfg: AzureTestSequenceNodejs.cfg = self.client.get_deployment_config( cloud_config["deployment"], logging_filename=f"regression_{deployment_name}_{benchmark_name}.log", ) + # Initialize Azure CLI if not already done if not hasattr(AzureTestSequenceNodejs, "cli"): AzureTestSequenceNodejs.cli = AzureCLI( self.client.config, self.client.docker_client ) + # Create log file name and get deployment client f = f"regression_{deployment_name}_{benchmark_name}_" f += f"{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), - deployment_config=AzureTestSequencePython.cfg, + deployment_config=AzureTestSequencePython.cfg, # Note: This uses Python config ) + + # Initialize CLI and setup resources (no login needed - reuses Python session) deployment_client.system_resources.initialize_cli(cli=AzureTestSequenceNodejs.cli) deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -288,14 +529,45 @@ class GCPTestSequencePython( deployment_name="gcp", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Python benchmarks on Google Cloud Functions. + + This test class runs all Python benchmarks on Google Cloud Functions, + using x64 architecture and package deployment. Each test uses + HTTP triggers. + + Attributes: + benchmarks: List of Python benchmarks to test + architectures: List of GCP architectures to test (x64) + deployments: List of deployment types to test (package) + deployment_name: Cloud provider name ("gcp") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get a GCP deployment client for the specified configuration. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (package) + + Returns: + An initialized Google Cloud Functions deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "gcp" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + + # Create log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with GCPTestSequencePython.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -310,14 +582,45 @@ class GCPTestSequenceNodejs( deployment_name="gcp", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Node.js benchmarks on Google Cloud Functions. + + This test class runs all Node.js benchmarks on Google Cloud Functions, + using x64 architecture and package deployment. Each test uses + HTTP triggers. + + Attributes: + benchmarks: List of Node.js benchmarks to test + architectures: List of GCP architectures to test (x64) + deployments: List of deployment types to test (package) + deployment_name: Cloud provider name ("gcp") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get a GCP deployment client for the specified configuration. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (package) + + Returns: + An initialized Google Cloud Functions deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "gcp" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + + # Create log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with GCPTestSequenceNodejs.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -332,19 +635,54 @@ class OpenWhiskTestSequencePython( deployment_name="openwhisk", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Python benchmarks on OpenWhisk. + + This test class runs all Python benchmarks on OpenWhisk, + using x64 architecture and container deployment. Each test uses + HTTP triggers. + + Attributes: + benchmarks: List of Python benchmarks to test + architectures: List of OpenWhisk architectures to test (x64) + deployments: List of deployment types to test (container) + deployment_name: Cloud provider name ("openwhisk") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an OpenWhisk deployment client for the specified configuration. + + This method handles special OpenWhisk setup requirements, including + creating a modified configuration with architecture and deployment + type settings. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (container) + + Returns: + An initialized OpenWhisk deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "openwhisk" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + # Create a copy of the config and set architecture and deployment type config_copy = cloud_config.copy() config_copy["experiments"]["architecture"] = architecture config_copy["experiments"]["container_deployment"] = deployment_type == "container" + # Create log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( config_copy, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with OpenWhiskTestSequencePython.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -359,52 +697,119 @@ class OpenWhiskTestSequenceNodejs( deployment_name="openwhisk", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Node.js benchmarks on OpenWhisk. + + This test class runs all Node.js benchmarks on OpenWhisk, + using x64 architecture and container deployment. Each test uses + HTTP triggers. + + Attributes: + benchmarks: List of Node.js benchmarks to test + architectures: List of OpenWhisk architectures to test (x64) + deployments: List of deployment types to test (container) + deployment_name: Cloud provider name ("openwhisk") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an OpenWhisk deployment client for the specified configuration. + + This method handles special OpenWhisk setup requirements, including + creating a modified configuration with architecture and deployment + type settings. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (container) + + Returns: + An initialized OpenWhisk deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "openwhisk" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + # Create a copy of the config and set architecture and deployment type config_copy = cloud_config.copy() config_copy["experiments"]["architecture"] = architecture config_copy["experiments"]["container_deployment"] = deployment_type == "container" + # Create log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( config_copy, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with OpenWhiskTestSequenceNodejs.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client -# https://stackoverflow.com/questions/22484805/a-simple-working-example-for-testtools-concurrentstreamtestsuite +# Stream result handler for concurrent test execution +# Based on https://stackoverflow.com/questions/22484805/a-simple-working-example-for-testtools-concurrentstreamtestsuite class TracingStreamResult(testtools.StreamResult): + """Stream result handler for concurrent test execution. + + This class captures test execution results and maintains running state + for all tests. It tracks successful tests, failed tests, and collects + test output for reporting. + + Attributes: + all_correct: Whether all tests have passed + output: Dictionary mapping test IDs to their output bytes + success: Set of test names that succeeded + failures: Set of test names that failed + """ + all_correct: bool output: Dict[str, bytes] = {} def __init__(self): + """Initialize a new stream result handler. + + Sets up initial state for tracking test results. + """ self.all_correct = True self.success = set() self.failures = set() - # no way to directly access test instance from here def status(self, *args, **kwargs): + """Process a test status update. + + This method is called by the test runner to report on test progress + and results. It parses test IDs, collects output, and tracks success/failure. + + Args: + *args: Variable length argument list (not used) + **kwargs: Keyword arguments including test_id, test_status, and file_bytes + """ + # Update overall test status (only inprogress and success states are considered passing) self.all_correct = self.all_correct and (kwargs["test_status"] in ["inprogress", "success"]) + # Extract benchmark, architecture, and deployment type from test ID bench, arch, deployment_type = kwargs["test_id"].split("_")[-3:None] test_name = f"{bench}, {arch}, {deployment_type}" + if not kwargs["test_status"]: + # Collect test output test_id = kwargs["test_id"] if test_id not in self.output: self.output[test_id] = b"" self.output[test_id] += kwargs["file_bytes"] elif kwargs["test_status"] == "fail": + # Handle test failure print("\n-------------\n") print("{0[test_id]}: {0[test_status]}".format(kwargs)) print("{0[test_id]}: {1}".format(kwargs, self.output[kwargs["test_id"]].decode())) print("\n-------------\n") self.failures.add(test_name) elif kwargs["test_status"] == "success": + # Track successful tests self.success.add(test_name) @@ -415,19 +820,39 @@ def filter_out_benchmarks( language_version: str, architecture: str, ) -> bool: + """Filter out benchmarks that are not supported on specific platforms. + + Some benchmarks are not compatible with certain runtime environments due + to memory constraints, unsupported libraries, or other limitations. + This function identifies those incompatible combinations. + + Args: + benchmark: The benchmark name to check + deployment_name: Cloud provider name (aws, azure, gcp, openwhisk) + language: Runtime language (python, nodejs) + language_version: Language version (e.g., "3.9", "3.10") + architecture: CPU architecture (x64, arm64) + + Returns: + bool: True if the benchmark should be included, False to filter it out + """ # fmt: off + # Filter out image recognition on newer Python versions on AWS if (deployment_name == "aws" and language == "python" and language_version in ["3.9", "3.10", "3.11"]): return "411.image-recognition" not in benchmark + # Filter out image recognition on ARM architecture on AWS if (deployment_name == "aws" and architecture == "arm64"): return "411.image-recognition" not in benchmark + # Filter out image recognition on newer Python versions on GCP if (deployment_name == "gcp" and language == "python" and language_version in ["3.8", "3.9", "3.10", "3.11", "3.12"]): return "411.image-recognition" not in benchmark # fmt: on + # All other benchmarks are supported return True @@ -438,34 +863,72 @@ def regression_suite( deployment_config: dict, benchmark_name: Optional[str] = None, ): + """Create and run a regression test suite for specified cloud providers. + + This function creates a test suite with all applicable test combinations for + the selected cloud providers and runtime configuration. It then runs the tests + concurrently and reports on successes and failures. + + Args: + sebs_client: The SeBS client instance + experiment_config: Configuration dictionary for the experiment + providers: Set of cloud provider names to test + deployment_config: Configuration dictionary for deployments + benchmark_name: Optional name of a specific benchmark to test + + Returns: + bool: True if any tests failed, False if all tests succeeded + + Raises: + AssertionError: If a requested provider is not in the deployment config + """ + # Create the test suite suite = unittest.TestSuite() + + # Make cloud_config available to test classes global cloud_config cloud_config = deployment_config + # Extract runtime configuration language = experiment_config["runtime"]["language"] language_version = experiment_config["runtime"]["version"] architecture = experiment_config["architecture"] + # Add AWS tests if requested if "aws" in providers: - assert "aws" in cloud_config["deployment"] + assert ( + "aws" in cloud_config["deployment"] + ), "AWS provider requested but not in deployment config" if language == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AWSTestSequencePython)) elif language == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AWSTestSequenceNodejs)) + + # Add GCP tests if requested if "gcp" in providers: - assert "gcp" in cloud_config["deployment"] + assert ( + "gcp" in cloud_config["deployment"] + ), "GCP provider requested but not in deployment config" if language == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(GCPTestSequencePython)) elif language == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(GCPTestSequenceNodejs)) + + # Add Azure tests if requested if "azure" in providers: - assert "azure" in cloud_config["deployment"] + assert ( + "azure" in cloud_config["deployment"] + ), "Azure provider requested but not in deployment config" if language == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequencePython)) elif language == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequenceNodejs)) + + # Add OpenWhisk tests if requested if "openwhisk" in providers: - assert "openwhisk" in cloud_config["deployment"] + assert ( + "openwhisk" in cloud_config["deployment"] + ), "OpenWhisk provider requested but not in deployment config" if language == "python": suite.addTest( unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequencePython) @@ -475,37 +938,44 @@ def regression_suite( unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequenceNodejs) ) + # Prepare the list of tests to run tests = [] - # mypy is confused here + # mypy is confused here about the type for case in suite: for test in case: # type: ignore - # skip + # Get the test method name test_name = cast(unittest.TestCase, test)._testMethodName - # Remove unsupported benchmarks + # Filter out unsupported benchmarks if not filter_out_benchmarks( test_name, test.deployment_name, # type: ignore - language, # type: ignore + language, language_version, architecture, # type: ignore ): print(f"Skip test {test_name} - not supported.") continue - # Use only a selected benchmark + # Filter by benchmark name if specified if not benchmark_name or (benchmark_name and benchmark_name in test_name): + # Set up test instance with client and config test.client = sebs_client # type: ignore test.experiment_config = experiment_config.copy() # type: ignore tests.append(test) else: print(f"Skip test {test_name}") + # Create a concurrent test suite for parallel execution concurrent_suite = testtools.ConcurrentStreamTestSuite(lambda: ((test, None) for test in tests)) result = TracingStreamResult() + + # Run the tests result.startTestRun() concurrent_suite.run(result) result.stopTestRun() + + # Report results print(f"Succesfully executed {len(result.success)} out of {len(tests)} functions") for suc in result.success: print(f"- {suc}") @@ -514,9 +984,11 @@ def regression_suite( for failure in result.failures: print(f"- {failure}") + # Clean up resources if hasattr(AzureTestSequenceNodejs, "cli"): AzureTestSequenceNodejs.cli.shutdown() if hasattr(AzureTestSequencePython, "cli"): AzureTestSequencePython.cli.shutdown() + # Return True if any test failed return not result.all_correct diff --git a/sebs/sebs.py b/sebs/sebs.py index 309c0b25..907cca5b 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -1,3 +1,19 @@ +"""Main SeBS (Serverless Benchmarking Suite) client implementation. + +This module provides the main interface for the Serverless Benchmarking Suite, +offering a unified API for deploying, executing, and benchmarking serverless +functions across multiple cloud providers and locally. It manages: + +- Deployment client creation for different platforms (AWS, Azure, GCP, OpenWhisk, local) +- Benchmark execution and configuration +- Experiment setup and execution +- Storage access (object storage and NoSQL) +- Caching and Docker management +- Logging and output handling + +The SeBS client is the central point of interaction for both the CLI and programmatic use. +""" + import os from typing import Optional, Dict, Type @@ -20,31 +36,88 @@ class SeBS(LoggingBase): + """Main client for the Serverless Benchmarking Suite. + + This class provides the primary interface for interacting with the benchmarking + suite. It manages deployment clients, benchmarks, experiments, and resources. + It handles caching, logging, and provides factory methods for creating the + various components needed for benchmarking. + + Attributes: + cache_client: Client for managing cached artifacts (code packages, etc.) + docker_client: Docker client for container operations + output_dir: Directory for storing output files and logs + verbose: Whether to enable verbose logging + logging_filename: Default log file name + config: Global SeBS configuration + """ + @property def cache_client(self) -> Cache: + """Get the cache client. + + Returns: + Cache client for managing cached artifacts + """ return self._cache_client @property def docker_client(self) -> docker.client: + """Get the Docker client. + + Returns: + Docker client for container operations + """ return self._docker_client @property def output_dir(self) -> str: + """Get the output directory. + + Returns: + Path to the output directory + """ return self._output_dir @property def verbose(self) -> bool: + """Get the verbose flag. + + Returns: + Whether verbose logging is enabled + """ return self._verbose @property def logging_filename(self) -> Optional[str]: + """Get the default logging filename. + + Returns: + Default logging filename or None if not set + """ return self._logging_filename @property def config(self) -> SeBSConfig: + """Get the global SeBS configuration. + + Returns: + Global configuration object + """ return self._config def generate_logging_handlers(self, logging_filename: Optional[str] = None) -> LoggingHandlers: + """Generate logging handlers for a specific file. + + This method creates or retrieves cached logging handlers for a given filename. + If no filename is provided, the default logging filename is used. + + Args: + logging_filename: Optional filename for logs, defaults to self.logging_filename + + Returns: + LoggingHandlers configured for the specified file + """ filename = logging_filename if logging_filename else self.logging_filename if filename in self._handlers: return self._handlers[filename] @@ -60,6 +133,21 @@ def __init__( verbose: bool = False, logging_filename: Optional[str] = None, ): + """Initialize the SeBS client. + + Creates a new SeBS client with the specified configuration. This sets up: + - Docker client + - Cache client + - Global configuration + - Logging handlers + - Output directory + + Args: + cache_dir: Directory for caching artifacts + output_dir: Directory for storing output files and logs + verbose: Whether to enable verbose logging (default: False) + logging_filename: Default log file name (default: None) + """ super().__init__() self._docker_client = docker.from_env() self._cache_client = Cache(cache_dir, self._docker_client) @@ -70,12 +158,16 @@ def __init__( self._handlers: Dict[Optional[str], LoggingHandlers] = {} self.logging_handlers = self.generate_logging_handlers() + # Create output directory if it doesn't exist os.makedirs(self.output_dir, exist_ok=True) def ignore_cache(self): - """ - The cache will only store code packages, - and won't update new functions and storage. + """Configure the cache to only store code packages. + + After calling this method, the cache will only store code packages + and won't update or use cached functions and storage. This is useful + when you want to ensure that functions are redeployed and storage + is recreated, but still want to reuse code packages. """ self._cache_client.ignore_storage = True self._cache_client.ignore_functions = True @@ -86,10 +178,33 @@ def get_deployment( logging_filename: Optional[str] = None, deployment_config: Optional[Config] = None, ) -> FaaSSystem: + """Get a deployment client for a specific cloud platform. + + This method creates and returns a deployment client for the specified + cloud platform. It validates that the requested platform and configuration + are supported, and initializes the client with the appropriate resources. + + The method dynamically imports the necessary modules for each platform + based on what's available in the environment, determined by has_platform(). + + Args: + config: Configuration dictionary with deployment and experiment settings + logging_filename: Optional filename for logs + deployment_config: Optional pre-configured deployment config + + Returns: + An initialized FaaS system deployment client + + Raises: + RuntimeError: If the requested deployment is not supported or if the + configuration is invalid (unsupported architecture, + deployment type, etc.) + """ dep_config = config["deployment"] name = dep_config["name"] implementations: Dict[str, Type[FaaSSystem]] = {"local": Local} + # Dynamically import platform-specific modules as needed if has_platform("aws"): from sebs.aws import AWS @@ -107,9 +222,11 @@ def get_deployment( implementations["openwhisk"] = OpenWhisk + # Validate deployment platform if name not in implementations: raise RuntimeError("Deployment {name} not supported!".format(name=name)) + # Validate architecture if config["experiments"]["architecture"] not in self._config.supported_architecture(name): raise RuntimeError( "{architecture} is not supported in {name}".format( @@ -117,21 +234,24 @@ def get_deployment( ) ) + # Validate deployment type - container if config["experiments"][ "container_deployment" ] and not self._config.supported_container_deployment(name): raise RuntimeError(f"Container deployment is not supported in {name}.") + # Validate deployment type - package if not config["experiments"][ "container_deployment" ] and not self._config.supported_package_deployment(name): raise RuntimeError(f"Code package deployment is not supported in {name}.") - # FIXME: future annotations, requires Python 3.7+ + # Set up logging and create deployment configuration handlers = self.generate_logging_handlers(logging_filename) if not deployment_config: deployment_config = Config.deserialize(dep_config, self.cache_client, handlers) + # Create and return the deployment client deployment_client = implementations[name]( self._config, deployment_config, # type: ignore @@ -146,15 +266,57 @@ def get_deployment_config( config: dict, logging_filename: Optional[str] = None, ) -> Config: + """Create a deployment configuration from a dictionary. + + This method deserializes a deployment configuration from a dictionary, + setting up logging handlers and connecting it to the cache client. + + Args: + config: Configuration dictionary + logging_filename: Optional filename for logs + + Returns: + A deserialized deployment configuration object + """ handlers = self.generate_logging_handlers(logging_filename) return Config.deserialize(config, self.cache_client, handlers) def get_experiment_config(self, config: dict) -> ExperimentConfig: + """Create an experiment configuration from a dictionary. + + This method deserializes an experiment configuration from a dictionary. + The experiment configuration contains settings specific to the + experiment being run, such as the number of iterations, timeout, etc. + + Args: + config: Configuration dictionary + + Returns: + A deserialized experiment configuration object + """ return ExperimentConfig.deserialize(config) def get_experiment( self, experiment_type: str, config: dict, logging_filename: Optional[str] = None ) -> Experiment: + """Get an experiment implementation for a specific experiment type. + + This method creates and returns an experiment implementation for the + specified experiment type. It validates that the requested experiment + type is supported and initializes the experiment with the appropriate + configuration. + + Args: + experiment_type: Type of experiment to create (e.g., "perf-cost") + config: Configuration dictionary + logging_filename: Optional filename for logs + + Returns: + An initialized experiment implementation + + Raises: + RuntimeError: If the requested experiment type is not supported + """ from sebs.experiments import ( Experiment, PerfCost, @@ -163,14 +325,19 @@ def get_experiment( EvictionModel, ) + # Map of supported experiment types to their implementations implementations: Dict[str, Type[Experiment]] = { "perf-cost": PerfCost, "network-ping-pong": NetworkPingPong, "invocation-overhead": InvocationOverhead, "eviction-model": EvictionModel, } + + # Validate experiment type if experiment_type not in implementations: raise RuntimeError(f"Experiment {experiment_type} not supported!") + + # Create and configure the experiment experiment = implementations[experiment_type](self.get_experiment_config(config)) experiment.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename @@ -184,6 +351,22 @@ def get_benchmark( config: ExperimentConfig, logging_filename: Optional[str] = None, ) -> Benchmark: + """Get a benchmark implementation for a specific benchmark. + + This method creates and returns a benchmark implementation for the + specified benchmark name. It configures the benchmark with the + appropriate deployment, configuration, and resources. + + Args: + name: Name of the benchmark to create (e.g., "210.thumbnailer") + deployment: FaaS system deployment client + config: Experiment configuration + logging_filename: Optional filename for logs + + Returns: + An initialized benchmark implementation + """ + # Create and configure the benchmark benchmark = Benchmark( name, deployment.name(), @@ -193,6 +376,8 @@ def get_benchmark( self.cache_client, self.docker_client, ) + + # Set up logging benchmark.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename ) @@ -200,37 +385,117 @@ def get_benchmark( @staticmethod def get_storage_implementation(storage_type: types.Storage) -> Type[PersistentStorage]: + """Get a storage implementation for a specific storage type. + + This method returns the class for a persistent storage implementation + for the specified storage type. + + Args: + storage_type: Type of storage to get implementation for + + Returns: + Storage implementation class + + Raises: + AssertionError: If the requested storage type is not supported + """ _storage_implementations = {types.Storage.MINIO: minio.Minio} impl = _storage_implementations.get(storage_type) - assert impl + assert impl, f"Storage type {storage_type} not supported" return impl @staticmethod def get_nosql_implementation(storage_type: types.NoSQLStorage) -> Type[NoSQLStorage]: + """Get a NoSQL storage implementation for a specific storage type. + + This method returns the class for a NoSQL storage implementation + for the specified storage type. + + Args: + storage_type: Type of NoSQL storage to get implementation for + + Returns: + NoSQL storage implementation class + + Raises: + AssertionError: If the requested storage type is not supported + """ _storage_implementations = {types.NoSQLStorage.SCYLLADB: scylladb.ScyllaDB} impl = _storage_implementations.get(storage_type) - assert impl + assert impl, f"NoSQL storage type {storage_type} not supported" return impl @staticmethod def get_storage_config_implementation(storage_type: types.Storage): + """Get a storage configuration implementation for a specific storage type. + + This method returns the class for a storage configuration implementation + for the specified storage type. + + Args: + storage_type: Type of storage to get configuration for + + Returns: + Storage configuration implementation class + + Raises: + AssertionError: If the requested storage type is not supported + """ _storage_implementations = {types.Storage.MINIO: config.MinioConfig} impl = _storage_implementations.get(storage_type) - assert impl + assert impl, f"Storage configuration for type {storage_type} not supported" return impl @staticmethod def get_nosql_config_implementation(storage_type: types.NoSQLStorage): + """Get a NoSQL configuration implementation for a specific storage type. + + This method returns the class for a NoSQL configuration implementation + for the specified storage type. + + Args: + storage_type: Type of NoSQL storage to get configuration for + + Returns: + NoSQL configuration implementation class + + Raises: + AssertionError: If the requested storage type is not supported + """ _storage_implementations = {types.NoSQLStorage.SCYLLADB: config.ScyllaDBConfig} impl = _storage_implementations.get(storage_type) - assert impl + assert impl, f"NoSQL configuration for type {storage_type} not supported" return impl def shutdown(self): + """Shutdown the SeBS client and release resources. + + This method shuts down the cache client and releases any resources + that need to be cleaned up when the client is no longer needed. + It is automatically called when using the client as a context manager. + """ self.cache_client.shutdown() def __enter__(self): + """Enter context manager. + + This method allows the SeBS client to be used as a context manager + using the 'with' statement, which ensures proper cleanup of resources. + + Returns: + The SeBS client instance + """ return self - def __exit__(self): + def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): + """Exit context manager. + + This method is called when exiting a 'with' block. It ensures that + resources are properly cleaned up by calling shutdown(). + + Args: + exc_type: Exception type if an exception occurred, None otherwise + exc_val: Exception value if an exception occurred, None otherwise + exc_tb: Exception traceback if an exception occurred, None otherwise + """ self.shutdown() diff --git a/sebs/statistics.py b/sebs/statistics.py index 8d00b855..9189a787 100644 --- a/sebs/statistics.py +++ b/sebs/statistics.py @@ -1,3 +1,11 @@ +"""Statistical analysis utilities for benchmark experiments. + +This module provides functions for computing basic statistics and confidence +intervals on benchmark experiment results. It includes both parametric +(Student's t-distribution) and non-parametric (Le Boudec) methods for +computing confidence intervals. +""" + import math from typing import List, Tuple from collections import namedtuple @@ -5,31 +13,79 @@ import numpy as np import scipy.stats as st +# Named tuple for basic statistics results BasicStats = namedtuple("BasicStats", "mean median std cv") def basic_stats(times: List[float]) -> BasicStats: + """Compute basic statistics for a list of measurement times. + + This function computes the mean, median, standard deviation, and + coefficient of variation for a list of measurement times. + + Args: + times: List of measurement times + + Returns: + A BasicStats named tuple with the computed statistics + """ mean = np.mean(times) median = np.median(times) std = np.std(times) - cv = std / mean * 100 + cv = std / mean * 100 # Coefficient of variation as percentage return BasicStats(mean, median, std, cv) def ci_tstudents(alpha: float, times: List[float]) -> Tuple[float, float]: + """Compute parametric confidence interval using Student's t-distribution. + + This function computes a confidence interval for the mean of the given + measurement times using Student's t-distribution. This is a parametric + method that assumes the data follows a normal distribution. + + Args: + alpha: Confidence level (e.g., 0.95 for 95% confidence) + times: List of measurement times + + Returns: + A tuple (lower, upper) representing the confidence interval + """ mean = np.mean(times) return st.t.interval(alpha, len(times) - 1, loc=mean, scale=st.sem(times)) def ci_le_boudec(alpha: float, times: List[float]) -> Tuple[float, float]: - + """Compute non-parametric confidence interval using Le Boudec's method. + + This function computes a confidence interval for the median of the given + measurement times using the method described by Le Boudec. This is a + non-parametric method that does not assume any particular distribution + of the data. + + Reference: + J.-Y. Le Boudec, "Methods for the Estimation of the Accuracy of + Measurements in Computer Performance Evaluation", + Performance Evaluation Review, 2010 + + Args: + alpha: Confidence level (e.g., 0.95 for 95% confidence) + times: List of measurement times + + Returns: + A tuple (lower, upper) representing the confidence interval + + Raises: + AssertionError: If an unsupported confidence level is provided + """ sorted_times = sorted(times) n = len(times) - # z(alfa/2) + # Z-values for common confidence levels + # z(alpha/2) for two-sided interval z_value = {0.95: 1.96, 0.99: 2.576}.get(alpha) - assert z_value + assert z_value, f"Unsupported confidence level: {alpha}" + # Calculate positions in the sorted array low_pos = math.floor((n - z_value * math.sqrt(n)) / 2) high_pos = math.ceil(1 + (n + z_value * math.sqrt(n)) / 2) diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index bb9112a2..db9b1f9e 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -1,3 +1,12 @@ +""" +Module for MinIO S3-compatible storage in the Serverless Benchmarking Suite. + +This module implements local object storage using MinIO, which provides an +S3-compatible API. MinIO runs in a Docker container and provides persistent +storage for benchmark data and results. It is primarily used for local +testing and development of S3-dependent serverless functions. +""" + import copy import json import os @@ -17,15 +26,39 @@ class Minio(PersistentStorage): + """ + S3-compatible object storage implementation using MinIO. + + This class manages a MinIO storage instance running in a Docker container, + providing S3-compatible object storage for local benchmarking. It handles + bucket creation, file uploads/downloads, and container lifecycle management. + + Attributes: + config: MinIO configuration settings + connection: MinIO client connection + """ + @staticmethod def typename() -> str: + """ + Get the qualified type name of this class. + + Returns: + str: Full type name including deployment name + """ return f"{Minio.deployment_name()}.Minio" @staticmethod def deployment_name() -> str: + """ + Get the deployment platform name. + + Returns: + str: Deployment name ('minio') + """ return "minio" - # the location does not matter + # The region setting is required by S3 API but not used for local MinIO MINIO_REGION = "us-east-1" def __init__( @@ -35,6 +68,15 @@ def __init__( resources: Resources, replace_existing: bool, ): + """ + Initialize a MinIO storage instance. + + Args: + docker_client: Docker client for managing the MinIO container + cache_client: Cache client for storing storage configuration + resources: Resources configuration + replace_existing: Whether to replace existing buckets + """ super().__init__(self.MINIO_REGION, cache_client, resources, replace_existing) self._docker_client = docker_client self._storage_container: Optional[docker.container] = None @@ -42,17 +84,35 @@ def __init__( @property def config(self) -> MinioConfig: + """ + Get the MinIO configuration. + + Returns: + MinioConfig: The configuration object + """ return self._cfg @config.setter def config(self, config: MinioConfig): + """ + Set the MinIO configuration. + + Args: + config: New configuration object + """ self._cfg = config @staticmethod def _define_http_client(): """ - Minio does not allow another way of configuring timeout for connection. - The rest of configuration is copied from source code of Minio. + Configure HTTP client for MinIO with appropriate timeouts and retries. + + MinIO does not provide a direct way to configure connection timeouts, so + we need to create a custom HTTP client with proper timeout settings. + The rest of configuration follows MinIO's default client settings. + + Returns: + urllib3.PoolManager: Configured HTTP client for MinIO """ import urllib3 from datetime import timedelta @@ -68,13 +128,25 @@ def _define_http_client(): ) def start(self): - + """ + Start a MinIO storage container. + + Creates and runs a Docker container with MinIO, configuring it with + random credentials and mounting a volume for persistent storage. + The container runs in detached mode and is accessible via the + configured port. + + Raises: + RuntimeError: If starting the MinIO container fails + """ + # Set up data volume location if self._cfg.data_volume == "": minio_volume = os.path.join(project_absolute_path(), "minio-volume") else: minio_volume = self._cfg.data_volume minio_volume = os.path.abspath(minio_volume) + # Create volume directory if it doesn't exist os.makedirs(minio_volume, exist_ok=True) volumes = { minio_volume: { @@ -83,13 +155,16 @@ def start(self): } } + # Generate random credentials for security self._cfg.access_key = secrets.token_urlsafe(32) self._cfg.secret_key = secrets.token_hex(32) self._cfg.address = "" self.logging.info("Minio storage ACCESS_KEY={}".format(self._cfg.access_key)) self.logging.info("Minio storage SECRET_KEY={}".format(self._cfg.secret_key)) + try: self.logging.info(f"Starting storage Minio on port {self._cfg.mapped_port}") + # Run the MinIO container self._storage_container = self._docker_client.containers.run( f"minio/minio:{self._cfg.version}", command="server /data", @@ -115,27 +190,41 @@ def start(self): raise RuntimeError("Starting Minio storage unsuccesful") def configure_connection(self): - # who knows why? otherwise attributes are not loaded + """ + Configure the connection to the MinIO container. + + Determines the appropriate address to connect to the MinIO container + based on the host platform. For Linux, it uses the container's IP address, + while for Windows, macOS, or WSL it uses localhost with the mapped port. + + Raises: + RuntimeError: If the MinIO container is not available or if the IP address + cannot be detected + """ + # Only configure if the address is not already set if self._cfg.address == "": - + # Verify container existence if self._storage_container is None: raise RuntimeError( "Minio container is not available! Make sure that you deployed " "the Minio storage and provided configuration!" ) + # Reload to ensure we have the latest container attributes self._storage_container.reload() - # Check if the system is Linux and that it's not WSL + # Platform-specific address configuration if is_linux(): + # On native Linux, use the container's bridge network IP networks = self._storage_container.attrs["NetworkSettings"]["Networks"] self._cfg.address = "{IPAddress}:{Port}".format( IPAddress=networks["bridge"]["IPAddress"], Port=9000 ) else: - # System is either WSL, Windows, or Mac + # On Windows, macOS, or WSL, use localhost with the mapped port self._cfg.address = f"localhost:{self._cfg.mapped_port}" + # Verify address was successfully determined if not self._cfg.address: self.logging.error( f"Couldn't read the IP address of container from attributes " @@ -145,37 +234,74 @@ def configure_connection(self): f"Incorrect detection of IP address for container with id {self._instance_id}" ) self.logging.info("Starting minio instance at {}".format(self._cfg.address)) + + # Create the connection using the configured address self.connection = self.get_connection() def stop(self): + """ + Stop the MinIO container. + + Gracefully stops the running MinIO container if it exists. + Logs an error if the container is not known. + """ if self._storage_container is not None: self.logging.info(f"Stopping minio container at {self._cfg.address}.") self._storage_container.stop() self.logging.info(f"Stopped minio container at {self._cfg.address}.") else: - self.logging.error("Stopping minio was not succesful, storage container not known!") + self.logging.error("Stopping minio was not successful, storage container not known!") def get_connection(self): + """ + Create a new MinIO client connection. + + Creates a connection to the MinIO server using the configured address, + credentials, and HTTP client settings. + + Returns: + minio.Minio: Configured MinIO client + """ return minio.Minio( self._cfg.address, access_key=self._cfg.access_key, secret_key=self._cfg.secret_key, - secure=False, + secure=False, # Local MinIO doesn't use HTTPS http_client=Minio._define_http_client(), ) def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: bool = False): + """ + Create a new bucket if it doesn't already exist. + + Checks if a bucket with the given name already exists in the list of buckets. + If not, creates a new bucket with either the exact name or a randomized name. + + Args: + name: Base name for the bucket + buckets: List of existing bucket names to check against + randomize_name: Whether to append a random UUID to the bucket name + + Returns: + str: Name of the existing or newly created bucket + + Raises: + minio.error.ResponseError: If bucket creation fails + """ + # Check if bucket already exists for bucket_name in buckets: if name in bucket_name: self.logging.info( "Bucket {} for {} already exists, skipping.".format(bucket_name, name) ) return bucket_name - # minio has limit of bucket name to 16 characters + + # MinIO has limit of bucket name to 16 characters if randomize_name: bucket_name = "{}-{}".format(name, str(uuid.uuid4())[0:16]) else: bucket_name = name + try: self.connection.make_bucket(bucket_name, location=self.MINIO_REGION) self.logging.info("Created bucket {}".format(bucket_name)) @@ -186,19 +312,39 @@ def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: boo minio.error.ResponseError, ) as err: self.logging.error("Bucket creation failed!") - # rethrow + # Rethrow the error for handling by the caller raise err def uploader_func(self, path_idx, file, filepath): + """ + Upload a file to the MinIO storage. + + Uploads a file to the specified input prefix in the benchmarks bucket. + This function is passed to benchmarks for uploading their input data. + + Args: + path_idx: Index of the input prefix to use + file: Name of the file within the bucket + filepath: Local path to the file to upload + + Raises: + minio.error.ResponseError: If the upload fails + """ try: key = os.path.join(self.input_prefixes[path_idx], file) bucket_name = self.get_bucket(Resources.StorageBucketType.BENCHMARKS) self.connection.fput_object(bucket_name, key, filepath) except minio.error.ResponseError as err: self.logging.error("Upload failed!") - raise (err) + raise err def clean(self): + """ + Clean all objects from output buckets. + + Removes all objects from the output buckets to prepare for a new + benchmark run. Logs any errors that occur during deletion. + """ for bucket in self.output_buckets: objects = self.connection.list_objects_v2(bucket) objects = [obj.object_name for obj in objects] @@ -206,6 +352,15 @@ def clean(self): self.logging.error("Deletion Error: {}".format(err)) def download_results(self, result_dir): + """ + Download all objects from output buckets to a local directory. + + Downloads benchmark results from all output buckets to a subdirectory + named 'storage_output' within the specified result directory. + + Args: + result_dir: Base directory to store downloaded results + """ result_dir = os.path.join(result_dir, "storage_output") for bucket in self.output_buckets: objects = self.connection.list_objects_v2(bucket) @@ -214,6 +369,15 @@ def download_results(self, result_dir): self.connection.fget_object(bucket, obj, os.path.join(result_dir, obj)) def clean_bucket(self, bucket: str): + """ + Remove all objects from a bucket. + + Deletes all objects within the specified bucket but keeps the bucket itself. + Logs any errors that occur during object deletion. + + Args: + bucket: Name of the bucket to clean + """ delete_object_list = map( lambda x: minio.DeleteObject(x.object_name), self.connection.list_objects(bucket_name=bucket), @@ -223,18 +387,69 @@ def clean_bucket(self, bucket: str): self.logging.error(f"Error when deleting object from bucket {bucket}: {error}!") def remove_bucket(self, bucket: str): + """ + Delete a bucket completely. + + Removes the specified bucket from the MinIO storage. + The bucket must be empty before it can be deleted. + + Args: + bucket: Name of the bucket to remove + """ self.connection.remove_bucket(Bucket=bucket) def correct_name(self, name: str) -> str: + """ + Format a bucket name to comply with MinIO naming requirements. + + For MinIO, no name correction is needed (unlike some cloud providers + that enforce additional restrictions). + + Args: + name: Original bucket name + + Returns: + str: Bucket name (unchanged for MinIO) + """ return name def download(self, bucket_name: str, key: str, filepath: str): + """ + Download an object from a bucket to a local file. + + Not implemented for this class. Use fget_object directly or other methods. + + Raises: + NotImplementedError: This method is not implemented + """ raise NotImplementedError() def exists_bucket(self, bucket_name: str) -> bool: + """ + Check if a bucket exists. + + Args: + bucket_name: Name of the bucket to check + + Returns: + bool: True if the bucket exists, False otherwise + """ return self.connection.bucket_exists(bucket_name) def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """ + List all objects in a bucket with an optional prefix filter. + + Args: + bucket_name: Name of the bucket to list + prefix: Optional prefix to filter objects + + Returns: + List[str]: List of object names in the bucket + + Raises: + RuntimeError: If the bucket does not exist + """ try: objects_list = self.connection.list_objects(bucket_name) return [obj.object_name for obj in objects_list if prefix in obj.object_name] @@ -242,6 +457,15 @@ def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: raise RuntimeError(f"Attempting to access a non-existing bucket {bucket_name}!") def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """ + List all buckets, optionally filtered by name. + + Args: + bucket_name: Optional filter for bucket names + + Returns: + List[str]: List of bucket names + """ buckets = self.connection.list_buckets() if bucket_name is not None: return [bucket.name for bucket in buckets if bucket_name in bucket.name] @@ -249,17 +473,32 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: return [bucket.name for bucket in buckets] def upload(self, bucket_name: str, filepath: str, key: str): + """ + Upload a file to a bucket. + + Not implemented for this class. Use fput_object directly or uploader_func. + + Raises: + NotImplementedError: This method is not implemented + """ raise NotImplementedError() def serialize(self) -> dict: + """ + Serialize MinIO configuration to a dictionary. + + Returns: + dict: Serialized configuration data + """ return self._cfg.serialize() """ - This implementation supports overriding this class. - The main Minio class is used to start/stop deployments. - - When overriding the implementation in Local/OpenWhisk/..., - we call the _deserialize and provide an alternative implementation. + Deserialization and inheritance support + + This implementation supports overriding this class. The main Minio class + is used to start/stop deployments. When overriding the implementation in + Local/OpenWhisk/..., we call the _deserialize method and provide an + alternative implementation type. """ T = TypeVar("T", bound="Minio") @@ -271,9 +510,30 @@ def _deserialize( resources: Resources, obj_type: Type[T], ) -> T: + """ + Deserialize a MinIO instance from cached configuration with custom type. + + Creates a new instance of the specified class type from cached configuration + data. This allows platform-specific versions to be deserialized correctly + while sharing the core implementation. + + Args: + cached_config: Cached MinIO configuration + cache_client: Cache client + resources: Resources configuration + obj_type: Type of object to create (a Minio subclass) + + Returns: + T: Deserialized instance of the specified type + + Raises: + RuntimeError: If the storage container does not exist + """ docker_client = docker.from_env() obj = obj_type(docker_client, cache_client, resources, False) obj._cfg = cached_config + + # Try to reconnect to existing container if ID is available if cached_config.instance_id: instance_id = cached_config.instance_id try: @@ -282,11 +542,28 @@ def _deserialize( raise RuntimeError(f"Storage container {instance_id} does not exist!") else: obj._storage_container = None + + # Copy bucket information obj._input_prefixes = copy.copy(cached_config.input_buckets) obj._output_prefixes = copy.copy(cached_config.output_buckets) + + # Set up connection obj.configure_connection() return obj @staticmethod def deserialize(cached_config: MinioConfig, cache_client: Cache, res: Resources) -> "Minio": + """ + Deserialize a MinIO instance from cached configuration. + + Creates a new Minio instance from cached configuration data. + + Args: + cached_config: Cached MinIO configuration + cache_client: Cache client + res: Resources configuration + + Returns: + Minio: Deserialized Minio instance + """ return Minio._deserialize(cached_config, cache_client, res, Minio) diff --git a/sebs/types.py b/sebs/types.py index b87516fb..988efbd0 100644 --- a/sebs/types.py +++ b/sebs/types.py @@ -1,12 +1,38 @@ +"""Type definitions for the Serverless Benchmarking Suite. + +This module provides enum types used throughout the benchmarking suite +to represent different platforms, storage types, and benchmark modules. +These types are used for configuration, deployment, and resource management. +""" + from enum import Enum class BenchmarkModule(str, Enum): + """Types of benchmark modules. + + This enum defines the different types of benchmark modules that can + be used by benchmark functions: + + - STORAGE: Object storage module for storing and retrieving files + - NOSQL: NoSQL database module for storing and retrieving structured data + """ STORAGE = "storage" NOSQL = "nosql" class Platforms(str, Enum): + """Supported serverless platforms. + + This enum defines the different serverless platforms supported by + the benchmarking suite: + + - AWS: Amazon Web Services Lambda + - AZURE: Microsoft Azure Functions + - GCP: Google Cloud Platform Cloud Functions + - LOCAL: Local execution environment + - OPENWHISK: Apache OpenWhisk + """ AWS = "aws" AZURE = "azure" GCP = "gcp" @@ -15,6 +41,16 @@ class Platforms(str, Enum): class Storage(str, Enum): + """Supported object storage services. + + This enum defines the different object storage services supported + by the benchmarking suite: + + - AWS_S3: Amazon Simple Storage Service (S3) + - AZURE_BLOB_STORAGE: Microsoft Azure Blob Storage + - GCP_STORAGE: Google Cloud Storage + - MINIO: MinIO object storage (local or self-hosted) + """ AWS_S3 = "aws-s3" AZURE_BLOB_STORAGE = "azure-blob-storage" GCP_STORAGE = "google-cloud-storage" @@ -22,6 +58,16 @@ class Storage(str, Enum): class NoSQLStorage(str, Enum): + """Supported NoSQL database services. + + This enum defines the different NoSQL database services supported + by the benchmarking suite: + + - AWS_DYNAMODB: Amazon DynamoDB + - AZURE_COSMOSDB: Microsoft Azure Cosmos DB + - GCP_DATASTORE: Google Cloud Datastore + - SCYLLADB: ScyllaDB (compatible with Apache Cassandra) + """ AWS_DYNAMODB = "aws-dynamodb" AZURE_COSMOSDB = "azure-cosmosdb" GCP_DATASTORE = "google-cloud-datastore" diff --git a/sebs/utils.py b/sebs/utils.py index e7ab43f6..c232be43 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -1,3 +1,14 @@ +""" +Utility functions and classes for the Serverless Benchmarking Suite (SeBs). + +This module provides common utilities used throughout the framework, including: +- File system operations and path management +- Process execution and command handling +- JSON serialization and data manipulation +- Logging configuration and utilities +- Platform detection functions +""" + import json import logging import os @@ -10,17 +21,45 @@ from typing import List, Optional +# Global constants PROJECT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir) DOCKER_DIR = os.path.join(PROJECT_DIR, "dockerfiles") PACK_CODE_APP = "pack_code_{}.sh" -def project_absolute_path(*paths: str): +def project_absolute_path(*paths: str) -> str: + """ + Join paths relative to the project root directory. + + Args: + *paths: Path components to join + + Returns: + str: Absolute path including the project directory + """ return os.path.join(PROJECT_DIR, *paths) class JSONSerializer(json.JSONEncoder): + """ + Custom JSON encoder for objects with serialize method. + + This encoder handles objects by: + 1. Using their serialize() method if available + 2. Converting dictionaries to strings + 3. Using vars() to get object attributes + 4. Falling back to string representation + """ def default(self, o): + """ + Custom serialization for objects. + + Args: + o: Object to serialize + + Returns: + JSON serializable representation of the object + """ if hasattr(o, "serialize"): return o.serialize() elif isinstance(o, dict): @@ -33,14 +72,36 @@ def default(self, o): def serialize(obj) -> str: + """ + Serialize an object to a JSON string. + + Args: + obj: Object to serialize + + Returns: + str: JSON string representation of the object + """ if hasattr(obj, "serialize"): return json.dumps(obj.serialize(), sort_keys=True, indent=2) else: return json.dumps(obj, cls=JSONSerializer, sort_keys=True, indent=2) -# Executing with shell provides options such as wildcard expansion -def execute(cmd, shell=False, cwd=None): +def execute(cmd, shell=False, cwd=None) -> str: + """ + Execute a shell command and capture its output, handling errors. + + Args: + cmd: Command to execute (string) + shell: Whether to use shell execution (enables wildcards, pipes, etc.) + cwd: Working directory for command execution + + Returns: + str: Command output as string + + Raises: + RuntimeError: If command execution fails + """ if not shell: cmd = cmd.split() ret = subprocess.run( @@ -53,7 +114,15 @@ def execute(cmd, shell=False, cwd=None): return ret.stdout.decode("utf-8") -def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]): +def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]) -> None: + """ + Update a nested dictionary with a value at the specified key path. + + Args: + cfg: Dictionary to update + keys: List of keys forming a path to the value + value: Value to set (skipped if None) + """ if value is not None: # make sure parent keys exist for key in keys[:-1]: @@ -61,7 +130,15 @@ def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]): cfg[keys[-1]] = value -def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]): +def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]) -> None: + """ + Append a dictionary to a nested location in another dictionary. + + Args: + cfg: Dictionary to update + keys: List of keys forming a path to the value + value: Dictionary to append (skipped if None or empty) + """ if value: # make sure parent keys exist for key in keys[:-1]: @@ -69,14 +146,35 @@ def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]): cfg[keys[-1]] = {**cfg[keys[-1]], **value} -def find(name, path): +def find(name: str, path: str) -> Optional[str]: + """ + Find a directory with the given name in the specified path. + + Args: + name: Directory name to find + path: Path to search in + + Returns: + str: Path to the found directory, or None if not found + """ for root, dirs, files in os.walk(path): if name in dirs: return os.path.join(root, name) return None -def create_output(directory, preserve_dir, verbose): +def create_output(directory: str, preserve_dir: bool, verbose: bool) -> str: + """ + Create or clean an output directory for benchmark results. + + Args: + directory: Path to create + preserve_dir: Whether to preserve existing directory + verbose: Verbosity level for logging + + Returns: + str: Absolute path to the output directory + """ output_dir = os.path.abspath(directory) if os.path.exists(output_dir) and not preserve_dir: shutil.rmtree(output_dir) @@ -87,8 +185,13 @@ def create_output(directory, preserve_dir, verbose): return output_dir -def configure_logging(): - +def configure_logging() -> None: + """ + Configure global logging settings. + + Reduces noise from third-party libraries by setting their log levels to ERROR. + This ensures that only important messages from these libraries are shown. + """ # disable information from libraries logging to decrease output noise loggers = ["urrlib3", "docker", "botocore"] for name in logging.root.manager.loggerDict: @@ -97,63 +200,53 @@ def configure_logging(): logging.getLogger(name).setLevel(logging.ERROR) -# def configure_logging(verbose: bool = False, output_dir: Optional[str] = None): -# logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" -# logging_date_format = "%H:%M:%S" -# -# # default file log -# options = { -# "format": logging_format, -# "datefmt": logging_date_format, -# "level": logging.DEBUG if verbose else logging.INFO, -# } -# if output_dir: -# options = { -# **options, -# "filename": os.path.join(output_dir, "out.log"), -# "filemode": "w", -# } -# logging.basicConfig(**options) -# # Add stdout output -# if output_dir: -# stdout = logging.StreamHandler(sys.stdout) -# formatter = logging.Formatter(logging_format, logging_date_format) -# stdout.setFormatter(formatter) -# stdout.setLevel(logging.DEBUG if verbose else logging.INFO) -# logging.getLogger().addHandler(stdout) -# # disable information from libraries logging to decrease output noise -# for name in logging.root.manager.loggerDict: -# if ( -# name.startswith("urllib3") -# or name.startswith("docker") -# or name.startswith("botocore") -# ): -# logging.getLogger(name).setLevel(logging.ERROR) - - -""" - Locate directory corresponding to a benchmark in benchmarks - or benchmarks-data directory. - - :param benchmark: Benchmark name. - :param path: Path for lookup, relative to repository. - :return: relative path to directory corresponding to benchmark -""" - - -def find_benchmark(benchmark: str, path: str): +def find_benchmark(benchmark: str, path: str) -> Optional[str]: + """ + Locate directory corresponding to a benchmark in the repository. + + Searches for a benchmark directory in either the benchmarks or + benchmarks-data directories. + + Args: + benchmark: Benchmark name + path: Path for lookup, relative to repository (usually 'benchmarks' or 'benchmarks-data') + + Returns: + str: Path to benchmark directory, or None if not found + """ benchmarks_dir = os.path.join(PROJECT_DIR, path) benchmark_path = find(benchmark, benchmarks_dir) return benchmark_path -def global_logging(): +def global_logging() -> None: + """ + Set up basic global logging configuration. + + Configures the root logger with a standard format, timestamp, and INFO level. + This provides a baseline for all logging in the application. + """ logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" logging.basicConfig(format=logging_format, datefmt=logging_date_format, level=logging.INFO) class ColoredWrapper: + """ + Wrapper for logging with colored console output. + + This class provides formatted, colorized logging output for better readability + in terminal environments. It optionally propagates messages to the standard + Python logger. + + Attributes: + SUCCESS: Green color code for success messages + STATUS: Blue color code for status/info messages + WARNING: Yellow color code for warnings + ERROR: Red color code for errors + BOLD: Bold text formatting code + END: Code to reset text formatting + """ SUCCESS = "\033[92m" STATUS = "\033[94m" WARNING = "\033[93m" @@ -162,38 +255,84 @@ class ColoredWrapper: END = "\033[0m" def __init__(self, prefix, logger, verbose=True, propagte=False): + """ + Initialize the colored logging wrapper. + + Args: + prefix: Prefix for log messages (usually class name) + logger: Python logger to propagate to + verbose: Whether to show debug messages + propagte: Whether to propagate messages to the Python logger + """ self.verbose = verbose self.propagte = propagte self.prefix = prefix self._logging = logger def debug(self, message): + """ + Log a debug message. + + Args: + message: The message to log + """ if self.verbose: self._print(message, ColoredWrapper.STATUS) if self.propagte: self._logging.debug(message) def info(self, message): + """ + Log an informational message. + + Args: + message: The message to log + """ self._print(message, ColoredWrapper.SUCCESS) if self.propagte: self._logging.info(message) def warning(self, message): + """ + Log a warning message. + + Args: + message: The message to log + """ self._print(message, ColoredWrapper.WARNING) if self.propagte: self._logging.warning(message) def error(self, message): + """ + Log an error message. + + Args: + message: The message to log + """ self._print(message, ColoredWrapper.ERROR) if self.propagte: self._logging.error(message) def critical(self, message): + """ + Log a critical error message. + + Args: + message: The message to log + """ self._print(message, ColoredWrapper.ERROR) if self.propagte: self._logging.critical(message) def _print(self, message, color): + """ + Print a formatted message to the console. + + Args: + message: The message to print + color: ANSI color code to use + """ timestamp = datetime.datetime.now().strftime("%H:%M:%S.%f") click.echo( f"{color}{ColoredWrapper.BOLD}[{timestamp}]{ColoredWrapper.END} " @@ -202,7 +341,25 @@ def _print(self, message, color): class LoggingHandlers: + """ + Configures and manages logging handlers. + + This class sets up handlers for logging to files and tracks verbosity settings + for use with ColoredWrapper. + + Attributes: + handler: FileHandler for logging to a file + verbosity: Whether to include debug-level messages + """ + def __init__(self, verbose: bool = False, filename: Optional[str] = None): + """ + Initialize logging handlers. + + Args: + verbose: Whether to include debug-level messages + filename: Optional file to log to + """ logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" formatter = logging.Formatter(logging_format, logging_date_format) @@ -220,7 +377,25 @@ def __init__(self, verbose: bool = False, filename: Optional[str] = None): class LoggingBase: + """ + Base class providing consistent logging functionality across the framework. + + This class sets up a logger with a unique identifier and provides methods + for logging at different levels with consistent formatting. It supports + both console output with color coding and optional file logging. + + Attributes: + log_name: Unique identifier for this logger + logging: ColoredWrapper for formatted console output + """ + def __init__(self): + """ + Initialize the logging base with a unique identifier. + + Creates a unique name for the logger based on class name and a random ID, + then configures a standard logger and colored wrapper. + """ uuid_name = str(uuid.uuid4())[0:4] if hasattr(self, "typename"): self.log_name = f"{self.typename()}-{uuid_name}" @@ -233,16 +408,34 @@ def __init__(self): @property def logging(self) -> ColoredWrapper: + """ + Get the colored logging wrapper. + + Returns: + ColoredWrapper: The logging wrapper for this instance + """ # This would always print log with color. And only if # filename in LoggingHandlers is set, it would log to file. return self.wrapper @property def logging_handlers(self) -> LoggingHandlers: + """ + Get the logging handlers configuration. + + Returns: + LoggingHandlers: The current handlers configuration + """ return self._logging_handlers @logging_handlers.setter def logging_handlers(self, handlers: LoggingHandlers): + """ + Set new logging handlers configuration. + + Args: + handlers: The new handlers configuration to use + """ self._logging_handlers = handlers self._logging.propagate = False @@ -258,21 +451,50 @@ def logging_handlers(self, handlers: LoggingHandlers): def has_platform(name: str) -> bool: + """ + Check if a specific platform is enabled via environment variable. + + Looks for SEBS_WITH_{name} environment variable set to 'true'. + + Args: + name: Platform name to check + + Returns: + bool: True if platform is enabled, False otherwise + """ return os.environ.get(f"SEBS_WITH_{name.upper()}", "False").lower() == "true" -# Check if the system is Linux and that it's not WSL def is_linux() -> bool: + """ + Check if the system is Linux and not Windows Subsystem for Linux. + + Returns: + bool: True if native Linux, False otherwise + """ return platform.system() == "Linux" and "microsoft" not in platform.release().lower() -def catch_interrupt(): - +def catch_interrupt() -> None: + """ + Set up a signal handler to catch interrupt signals (Ctrl+C). + + Prints a stack trace and exits when an interrupt is received. + This helps with debugging by showing the execution context at + the time of the interruption. + """ import signal import sys import traceback def handler(x, y): + """ + Handle interrupt signal by printing stack trace and exiting. + + Args: + x: Signal number + y: Frame object + """ traceback.print_stack() sys.exit(signal.SIGINT) From 75855cbcb64a79f8f8503225ecb19fe3ad636f59 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 18 Jun 2025 23:36:56 +0200 Subject: [PATCH 06/21] [system] Next batch of docstrings --- sebs/aws/__init__.py | 23 ++ sebs/aws/aws.py | 197 ++++++++-- sebs/aws/config.py | 365 ++++++++++++++++- sebs/aws/container.py | 91 ++++- sebs/aws/dynamodb.py | 143 ++++++- sebs/aws/resources.py | 84 +++- sebs/aws/s3.py | 158 +++++++- sebs/aws/triggers.py | 161 +++++++- sebs/azure/__init__.py | 39 ++ sebs/azure/azure.py | 339 +++++++++++++--- sebs/azure/blob_storage.py | 193 +++++++-- sebs/azure/cli.py | 148 +++++-- sebs/azure/config.py | 499 ++++++++++++++++++++---- sebs/azure/cosmosdb.py | 51 +++ sebs/azure/function.py | 68 +++- sebs/azure/triggers.py | 115 +++++- sebs/cache.py | 365 +++++++++++++---- sebs/config.py | 173 +++++++- sebs/experiments/environment.py | 146 ++++++- sebs/experiments/eviction_model.py | 61 ++- sebs/experiments/experiment.py | 3 +- sebs/experiments/invocation_overhead.py | 83 +++- sebs/experiments/network_ping_pong.py | 8 +- sebs/experiments/perf_cost.py | 12 +- sebs/experiments/result.py | 10 +- sebs/experiments/startup_time.py | 81 +++- sebs/gcp/__init__.py | 41 ++ sebs/gcp/cli.py | 103 +++-- sebs/gcp/config.py | 284 +++++++++++--- sebs/gcp/datastore.py | 180 ++++++++- sebs/gcp/function.py | 79 +++- sebs/gcp/gcp.py | 427 ++++++++++++++++---- sebs/gcp/resources.py | 101 ++++- sebs/gcp/storage.py | 137 ++++++- sebs/gcp/triggers.py | 167 +++++++- sebs/local/__init__.py | 16 + sebs/local/config.py | 156 +++++++- sebs/local/deployment.py | 97 ++++- sebs/local/function.py | 141 ++++++- sebs/local/local.py | 294 ++++++++++++-- sebs/local/measureMem.py | 73 +++- sebs/openwhisk/__init__.py | 26 ++ sebs/openwhisk/config.py | 240 +++++++++++- sebs/openwhisk/container.py | 74 +++- sebs/openwhisk/function.py | 119 +++++- sebs/openwhisk/openwhisk.py | 256 +++++++++++- sebs/openwhisk/triggers.py | 201 +++++++++- sebs/statistics.py | 10 +- sebs/storage/__init__.py | 33 ++ sebs/storage/config.py | 179 ++++++++- sebs/storage/minio.py | 40 +- sebs/storage/resources.py | 162 +++++++- sebs/storage/scylladb.py | 313 ++++++++++++--- 53 files changed, 6707 insertions(+), 858 deletions(-) diff --git a/sebs/aws/__init__.py b/sebs/aws/__init__.py index 44df1200..236041cf 100644 --- a/sebs/aws/__init__.py +++ b/sebs/aws/__init__.py @@ -1,3 +1,26 @@ +"""AWS module for the Serverless Benchmarking Suite (SeBS). + +This module provides the AWS implementation of the SeBS framework, enabling +deployment and management of serverless functions on AWS Lambda. It includes +comprehensive support for AWS services including Lambda, S3, DynamoDB, ECR, +and API Gateway. + +Key components: + AWS: Main AWS system implementation + LambdaFunction: AWS Lambda function representation + AWSConfig: AWS-specific configuration management + S3: S3 storage implementation + +The module handles AWS-specific functionality including: +- Lambda function deployment and management +- Container deployments via ECR +- S3 storage for code packages and data +- DynamoDB NoSQL storage +- API Gateway HTTP triggers +- IAM role management +- CloudWatch metrics collection +""" + from .aws import AWS, LambdaFunction # noqa from .config import AWSConfig # noqa from .s3 import S3 # noqa diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 2aa0f5a2..4e2aaf36 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -402,7 +402,15 @@ def create_function( return lambda_function - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """Set up triggers for a cached function. + + Configures triggers for a function that was loaded from cache, + ensuring they have proper logging handlers and deployment client references. + + Args: + function: Function instance to configure triggers for + """ from sebs.aws.triggers import LibraryTrigger @@ -477,7 +485,21 @@ def update_function( def update_function_configuration( self, function: Function, code_package: Benchmark, env_variables: dict = {} - ): + ) -> None: + """Update Lambda function configuration. + + Updates the function's timeout, memory, and environment variables. + Automatically adds environment variables for NoSQL storage table names + if the benchmark uses NoSQL storage. + + Args: + function: Function to update + code_package: Benchmark code package with configuration + env_variables: Additional environment variables to set + + Raises: + AssertionError: If code package input has not been processed + """ # We can only update storage configuration once it has been processed for this benchmark assert code_package.has_input_processed @@ -523,6 +545,19 @@ def update_function_configuration( def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """Generate default function name for a benchmark. + + Creates a standardized function name based on resource ID, benchmark name, + language, version, and architecture. Ensures the name is compatible with + AWS Lambda naming requirements. + + Args: + code_package: Benchmark code package + resources: Optional resources object (uses default if not provided) + + Returns: + str: Formatted function name suitable for AWS Lambda + """ # Create function name resource_id = resources.resources_id if resources else self.config.resources.resources_id func_name = "sebs-{}-{}-{}-{}-{}".format( @@ -538,47 +573,59 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """Format function name for AWS Lambda compatibility. + + AWS Lambda has specific naming requirements. This method ensures + the function name complies with AWS Lambda naming rules. + + Args: + func_name: Raw function name + + Returns: + str: Formatted function name with illegal characters replaced + """ # AWS Lambda does not allow hyphens in function names func_name = func_name.replace("-", "_") func_name = func_name.replace(".", "_") return func_name - """ - FIXME: does not clean the cache - """ - - def delete_function(self, func_name: Optional[str]): + def delete_function(self, func_name: Optional[str]) -> None: + """Delete an AWS Lambda function. + + Args: + func_name: Name of the function to delete + + Note: + FIXME: does not clean the cache + """ self.logging.debug("Deleting function {}".format(func_name)) try: self.client.delete_function(FunctionName=func_name) except Exception: self.logging.debug("Function {} does not exist!".format(func_name)) - """ - Prepare AWS resources to store experiment results. - Allocate one bucket. - - :param benchmark: benchmark name - :return: name of bucket to store experiment results - """ - - # def prepare_experiment(self, benchmark: str): - # logs_bucket = self.get_storage().add_output_bucket(benchmark, suffix="logs") - # return logs_bucket - - """ - Accepts AWS report after function invocation. - Returns a dictionary filled with values with various metrics such as - time, invocation time and memory consumed. - - :param log: decoded log from CloudWatch or from synchronuous invocation - :return: dictionary with parsed values - """ - @staticmethod def parse_aws_report( log: str, requests: Union[ExecutionResult, Dict[str, ExecutionResult]] ) -> str: + """Parse AWS Lambda execution report from CloudWatch logs. + + Extracts execution metrics from AWS Lambda log entries and updates + the corresponding ExecutionResult objects with timing, memory, and + billing information. + + Args: + log: Raw log string from CloudWatch or synchronous invocation + requests: Either a single ExecutionResult or dictionary mapping + request IDs to ExecutionResult objects + + Returns: + str: Request ID of the parsed execution + + Example: + The log format expected is tab-separated AWS Lambda report format: + "REPORT RequestId: abc123\tDuration: 100.00 ms\tBilled Duration: 100 ms\t..." + """ aws_vals = {} for line in log.split("\t"): if not line.isspace(): @@ -605,9 +652,23 @@ def parse_aws_report( return request_id def shutdown(self) -> None: + """Shutdown the AWS system and clean up resources. + + Calls the parent shutdown method to perform standard cleanup. + """ super().shutdown() - def get_invocation_error(self, function_name: str, start_time: int, end_time: int): + def get_invocation_error(self, function_name: str, start_time: int, end_time: int) -> None: + """Retrieve and log invocation errors from CloudWatch Logs. + + Queries CloudWatch Logs for error messages during the specified time range + and logs them for debugging purposes. + + Args: + function_name: Name of the Lambda function + start_time: Start time for log query (Unix timestamp) + end_time: End time for log query (Unix timestamp) + """ if not self.logs_client: self.logs_client = boto3.client( service_name="logs", @@ -650,7 +711,19 @@ def download_metrics( end_time: int, requests: Dict[str, ExecutionResult], metrics: dict, - ): + ) -> None: + """Download execution metrics from CloudWatch Logs. + + Queries CloudWatch Logs for Lambda execution reports and parses them + to extract performance metrics for each request. + + Args: + function_name: Name of the Lambda function + start_time: Start time for metrics collection (Unix timestamp) + end_time: End time for metrics collection (Unix timestamp) + requests: Dictionary mapping request IDs to ExecutionResult objects + metrics: Dictionary to store collected metrics + """ if not self.logs_client: self.logs_client = boto3.client( @@ -694,6 +767,21 @@ def download_metrics( ) def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """Create a trigger for the specified function. + + Creates and configures a trigger based on the specified type. Currently + supports HTTP triggers (via API Gateway) and library triggers. + + Args: + func: Function to create trigger for + trigger_type: Type of trigger to create (HTTP or LIBRARY) + + Returns: + Trigger: The created trigger instance + + Raises: + RuntimeError: If trigger type is not supported + """ from sebs.aws.triggers import HTTPTrigger function = cast(LambdaFunction, func) @@ -728,13 +816,31 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T self.cache_client.update_function(function) return trigger - def _enforce_cold_start(self, function: Function, code_package: Benchmark): + def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> None: + """Enforce cold start for a single function. + + Updates the function's environment variables to force a cold start + on the next invocation. + + Args: + function: Function to enforce cold start for + code_package: Benchmark code package with configuration + """ func = cast(LambdaFunction, function) self.update_function_configuration( func, code_package, {"ForceColdStart": str(self.cold_start_counter)} ) - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """Enforce cold start for multiple functions. + + Updates all specified functions to force cold starts on their next invocations. + This is useful for ensuring consistent performance measurements. + + Args: + functions: List of functions to enforce cold start for + code_package: Benchmark code package with configuration + """ self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func, code_package) @@ -744,19 +850,40 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) self.wait_function_updated(lambda_function) self.logging.info("Finished function updates enforcing cold starts.") - def wait_function_active(self, func: LambdaFunction): + def wait_function_active(self, func: LambdaFunction) -> None: + """Wait for Lambda function to become active after creation. + + Uses AWS Lambda waiter to wait until the function is in Active state + and ready to be invoked. + + Args: + func: Lambda function to wait for + """ self.logging.info("Waiting for Lambda function to be created...") waiter = self.client.get_waiter("function_active_v2") waiter.wait(FunctionName=func.name) self.logging.info("Lambda function has been created.") - def wait_function_updated(self, func: LambdaFunction): + def wait_function_updated(self, func: LambdaFunction) -> None: + """Wait for Lambda function to complete update process. + + Uses AWS Lambda waiter to wait until the function update is complete + and the function is ready to be invoked with new configuration. + + Args: + func: Lambda function to wait for + """ self.logging.info("Waiting for Lambda function to be updated...") waiter = self.client.get_waiter("function_updated_v2") waiter.wait(FunctionName=func.name) self.logging.info("Lambda function has been updated.") - def disable_rich_output(self): + def disable_rich_output(self) -> None: + """Disable rich output formatting for ECR operations. + + Disables colored/formatted output in the ECR container client, + useful for CI/CD environments or when plain text output is preferred. + """ self.ecr_client.disable_rich_output = True diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 2d05e842..527c8bc9 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -1,3 +1,16 @@ +"""Configuration management for AWS SeBS integration. + +This module provides configuration classes for AWS credentials, resources, and settings +used by the Serverless Benchmarking Suite when deploying to AWS Lambda. It handles +AWS authentication, resource management including ECR repositories, IAM roles, and +HTTP APIs, along with caching and serialization capabilities. + +Key classes: + AWSCredentials: Manages AWS access credentials and account information + AWSResources: Manages AWS resources like ECR repositories, IAM roles, and HTTP APIs + AWSConfig: Main configuration container combining credentials and resources +""" + import base64 import json import os @@ -14,7 +27,27 @@ class AWSCredentials(Credentials): - def __init__(self, access_key: str, secret_key: str): + """AWS authentication credentials for SeBS. + + This class manages AWS access credentials including access key, secret key, + and automatically retrieves the associated AWS account ID through STS. + + Attributes: + _access_key: AWS access key ID + _secret_key: AWS secret access key + _account_id: AWS account ID retrieved via STS + """ + + def __init__(self, access_key: str, secret_key: str) -> None: + """Initialize AWS credentials. + + Args: + access_key: AWS access key ID + secret_key: AWS secret access key + + Raises: + ClientError: If AWS credentials are invalid or STS call fails + """ super().__init__() self._access_key = access_key @@ -27,27 +60,73 @@ def __init__(self, access_key: str, secret_key: str): @staticmethod def typename() -> str: + """Get the type name for these credentials. + + Returns: + str: The type name 'AWS.Credentials' + """ return "AWS.Credentials" @property def access_key(self) -> str: + """Get the AWS access key ID. + + Returns: + str: AWS access key ID + """ return self._access_key @property def secret_key(self) -> str: + """Get the AWS secret access key. + + Returns: + str: AWS secret access key + """ return self._secret_key @property def account_id(self) -> str: + """Get the AWS account ID. + + Returns: + str: AWS account ID + """ return self._account_id @staticmethod def initialize(dct: dict) -> "AWSCredentials": + """Initialize AWS credentials from a dictionary. + + Args: + dct: Dictionary containing 'access_key' and 'secret_key' + + Returns: + AWSCredentials: Initialized credentials object + + Raises: + KeyError: If required keys are missing from dictionary + """ return AWSCredentials(dct["access_key"], dct["secret_key"]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: - + """Deserialize AWS credentials from configuration and cache. + + Loads AWS credentials from configuration file, environment variables, or cache. + Validates that credentials match cached account ID if available. + + Args: + config: Configuration dictionary that may contain credentials + cache: Cache instance for retrieving/storing credentials + handlers: Logging handlers for error reporting + + Returns: + Credentials: Deserialized AWSCredentials instance + + Raises: + RuntimeError: If credentials are missing or don't match cached account + """ # FIXME: update return types of both functions to avoid cast # needs 3.7+ to support annotations cached_config = cache.get_config("aws") @@ -85,33 +164,96 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden ret.logging_handlers = handlers return ret - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update the cache with current credentials. + + Args: + cache: Cache instance to update + """ cache.update_config(val=self.account_id, keys=["aws", "credentials", "account_id"]) def serialize(self) -> dict: + """Serialize credentials to a dictionary. + + Returns: + dict: Dictionary containing account_id + """ out = {"account_id": self._account_id} return out class AWSResources(Resources): + """AWS resource management for SeBS. + + This class manages AWS-specific resources including ECR repositories, + IAM roles, HTTP APIs, and Docker registry configurations. It provides + methods for creating and managing these resources with caching support. + + Attributes: + _docker_registry: Docker registry URL (ECR repository URI) + _docker_username: Docker registry username + _docker_password: Docker registry password + _container_repository: ECR repository name + _lambda_role: IAM role ARN for Lambda execution + _http_apis: Dictionary of HTTP API configurations + """ + class HTTPApi: - def __init__(self, arn: str, endpoint: str): + """HTTP API configuration for AWS API Gateway. + + Represents an HTTP API resource in AWS API Gateway with its ARN and endpoint. + + Attributes: + _arn: API Gateway ARN + _endpoint: API Gateway endpoint URL + """ + + def __init__(self, arn: str, endpoint: str) -> None: + """Initialize HTTP API configuration. + + Args: + arn: API Gateway ARN + endpoint: API Gateway endpoint URL + """ self._arn = arn self._endpoint = endpoint @property def arn(self) -> str: + """Get the API Gateway ARN. + + Returns: + str: API Gateway ARN + """ return self._arn @property def endpoint(self) -> str: + """Get the API Gateway endpoint URL. + + Returns: + str: API Gateway endpoint URL + """ return self._endpoint @staticmethod def deserialize(dct: dict) -> "AWSResources.HTTPApi": + """Deserialize HTTP API from dictionary. + + Args: + dct: Dictionary containing 'arn' and 'endpoint' + + Returns: + AWSResources.HTTPApi: Deserialized HTTP API instance + """ return AWSResources.HTTPApi(dct["arn"], dct["endpoint"]) def serialize(self) -> dict: + """Serialize HTTP API to dictionary. + + Returns: + dict: Dictionary containing arn and endpoint + """ out = {"arn": self.arn, "endpoint": self.endpoint} return out @@ -120,7 +262,14 @@ def __init__( registry: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, - ): + ) -> None: + """Initialize AWS resources. + + Args: + registry: Docker registry URL (ECR repository URI) + username: Docker registry username + password: Docker registry password + """ super().__init__(name="aws") self._docker_registry: Optional[str] = registry if registry != "" else None self._docker_username: Optional[str] = username if username != "" else None @@ -131,25 +280,65 @@ def __init__( @staticmethod def typename() -> str: + """Get the type name for these resources. + + Returns: + str: The type name 'AWS.Resources' + """ return "AWS.Resources" @property def docker_registry(self) -> Optional[str]: + """Get the Docker registry URL. + + Returns: + Optional[str]: Docker registry URL (ECR repository URI) + """ return self._docker_registry @property def docker_username(self) -> Optional[str]: + """Get the Docker registry username. + + Returns: + Optional[str]: Docker registry username + """ return self._docker_username @property def docker_password(self) -> Optional[str]: + """Get the Docker registry password. + + Returns: + Optional[str]: Docker registry password + """ return self._docker_password @property def container_repository(self) -> Optional[str]: + """Get the ECR repository name. + + Returns: + Optional[str]: ECR repository name + """ return self._container_repository def lambda_role(self, boto3_session: boto3.session.Session) -> str: + """Get or create IAM role for Lambda execution. + + Creates a Lambda execution role with S3 and basic execution permissions + if it doesn't already exist. The role allows Lambda functions to access + S3 and write CloudWatch logs. + + Args: + boto3_session: Boto3 session for AWS API calls + + Returns: + str: Lambda execution role ARN + + Raises: + ClientError: If IAM operations fail + """ if not self._lambda_role: iam_client = boto3_session.client(service_name="iam") trust_policy = { @@ -190,6 +379,23 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: def http_api( self, api_name: str, func: LambdaFunction, boto3_session: boto3.session.Session ) -> "AWSResources.HTTPApi": + """Get or create HTTP API for Lambda function. + + Creates an HTTP API Gateway that routes requests to the specified Lambda function. + If the API already exists, returns the cached instance. + + Args: + api_name: Name of the HTTP API + func: Lambda function to route requests to + boto3_session: Boto3 session for AWS API calls + + Returns: + AWSResources.HTTPApi: HTTP API configuration + + Raises: + RuntimeError: If API creation fails after retries + TooManyRequestsException: If API Gateway rate limits are exceeded + """ http_api = self._http_apis.get(api_name) if not http_api: @@ -245,6 +451,18 @@ def http_api( def check_ecr_repository_exists( self, ecr_client: ECRClient, repository_name: str ) -> Optional[str]: + """Check if ECR repository exists. + + Args: + ecr_client: ECR client instance + repository_name: Name of the ECR repository + + Returns: + Optional[str]: Repository URI if exists, None otherwise + + Raises: + Exception: If ECR operation fails (other than RepositoryNotFound) + """ try: resp = ecr_client.describe_repositories(repositoryNames=[repository_name]) return resp["repositories"][0]["repositoryUri"] @@ -255,6 +473,20 @@ def check_ecr_repository_exists( raise e def get_ecr_repository(self, ecr_client: ECRClient) -> str: + """Get or create ECR repository for container deployments. + + Creates an ECR repository with a unique name based on the resource ID + if it doesn't already exist. Updates the docker_registry property. + + Args: + ecr_client: ECR client instance + + Returns: + str: ECR repository name + + Raises: + ClientError: If ECR operations fail + """ if self._container_repository is not None: return self._container_repository @@ -281,6 +513,21 @@ def get_ecr_repository(self, ecr_client: ECRClient) -> str: return self._container_repository def ecr_repository_authorization(self, ecr_client: ECRClient) -> Tuple[str, str, str]: + """Get ECR repository authorization credentials. + + Retrieves temporary authorization token from ECR and extracts + username and password for Docker registry authentication. + + Args: + ecr_client: ECR client instance + + Returns: + Tuple[str, str, str]: Username, password, and registry URL + + Raises: + AssertionError: If username or registry are None + ClientError: If ECR authorization fails + """ if self._docker_password is None: response = ecr_client.get_authorization_token() @@ -295,7 +542,16 @@ def ecr_repository_authorization(self, ecr_client: ECRClient) -> Tuple[str, str, return self._docker_username, self._docker_password, self._docker_registry @staticmethod - def initialize(res: Resources, dct: dict): + def initialize(res: Resources, dct: dict) -> "AWSResources": + """Initialize AWS resources from dictionary. + + Args: + res: Base Resources instance to initialize + dct: Dictionary containing resource configuration + + Returns: + AWSResources: Initialized AWS resources instance + """ ret = cast(AWSResources, res) super(AWSResources, AWSResources).initialize(ret, dct) @@ -313,6 +569,11 @@ def initialize(res: Resources, dct: dict): return ret def serialize(self) -> dict: + """Serialize AWS resources to dictionary. + + Returns: + dict: Serialized resource configuration + """ out = { **super().serialize(), "lambda-role": self._lambda_role, @@ -325,7 +586,12 @@ def serialize(self) -> dict: } return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update cache with current resource configuration. + + Args: + cache: Cache instance to update + """ super().update_cache(cache) cache.update_config( val=self.docker_registry, keys=["aws", "resources", "docker", "registry"] @@ -342,6 +608,16 @@ def update_cache(self, cache: Cache): @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """Deserialize AWS resources from configuration and cache. + + Args: + config: Configuration dictionary + cache: Cache instance for retrieving cached resources + handlers: Logging handlers for status messages + + Returns: + Resources: Deserialized AWSResources instance + """ ret = AWSResources() cached_config = cache.get_config("aws") @@ -365,32 +641,81 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AWSConfig(Config): - def __init__(self, credentials: AWSCredentials, resources: AWSResources): + """Main AWS configuration container. + + Combines AWS credentials and resources into a single configuration object + for use by the AWS SeBS implementation. + + Attributes: + _credentials: AWS authentication credentials + _resources: AWS resource management configuration + """ + + def __init__(self, credentials: AWSCredentials, resources: AWSResources) -> None: + """Initialize AWS configuration. + + Args: + credentials: AWS authentication credentials + resources: AWS resource management configuration + """ super().__init__(name="aws") self._credentials = credentials self._resources = resources @staticmethod def typename() -> str: + """Get the type name for this configuration. + + Returns: + str: The type name 'AWS.Config' + """ return "AWS.Config" @property def credentials(self) -> AWSCredentials: + """Get AWS credentials. + + Returns: + AWSCredentials: AWS authentication credentials + """ return self._credentials @property def resources(self) -> AWSResources: + """Get AWS resources configuration. + + Returns: + AWSResources: AWS resource management configuration + """ return self._resources # FIXME: use future annotations (see sebs/faas/system) @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: dict) -> None: + """Initialize AWS configuration from dictionary. + + Args: + cfg: Base Config instance to initialize + dct: Dictionary containing 'region' configuration + """ config = cast(AWSConfig, cfg) config._region = dct["region"] @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: - + """Deserialize AWS configuration from config and cache. + + Creates an AWSConfig instance by deserializing credentials and resources, + then loading region configuration from cache or user-provided config. + + Args: + config: Configuration dictionary + cache: Cache instance for retrieving cached configuration + handlers: Logging handlers for status messages + + Returns: + Config: Deserialized AWSConfig instance + """ cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) @@ -408,19 +733,25 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config resources.region = config_obj.region return config_obj - """ - Update the contents of the user cache. + def update_cache(self, cache: Cache) -> None: + """Update the contents of the user cache. + The changes are directly written to the file system. - - Update values: region. - """ - - def update_cache(self, cache: Cache): + Updates region, credentials, and resources in the cache. + + Args: + cache: Cache instance to update + """ cache.update_config(val=self.region, keys=["aws", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) def serialize(self) -> dict: + """Serialize AWS configuration to dictionary. + + Returns: + dict: Serialized configuration including name, region, credentials, and resources + """ out = { "name": "aws", "region": self._region, diff --git a/sebs/aws/container.py b/sebs/aws/container.py index e7c2cbe6..74d536a6 100644 --- a/sebs/aws/container.py +++ b/sebs/aws/container.py @@ -1,3 +1,14 @@ +"""AWS ECR container management for SeBS. + +This module provides the ECRContainer class which handles Docker container +operations for AWS Lambda deployments using Amazon Elastic Container Registry (ECR). +It extends the base DockerContainer class with AWS-specific functionality for +image registry operations. + +Key classes: + ECRContainer: AWS ECR-specific container management +""" + import docker from typing import Tuple @@ -11,12 +22,33 @@ class ECRContainer(DockerContainer): + """AWS ECR container management for SeBS. + + This class handles Docker container operations specifically for AWS Lambda + deployments using Amazon Elastic Container Registry (ECR). It provides + functionality for building, tagging, and pushing container images to ECR. + + Attributes: + ecr_client: AWS ECR client for registry operations + config: AWS-specific configuration + """ + @staticmethod - def name(): + def name() -> str: + """Get the name of this container system. + + Returns: + str: System name ('aws') + """ return "aws" @staticmethod def typename() -> str: + """Get the type name of this container system. + + Returns: + str: Type name ('AWS.ECRContainer') + """ return "AWS.ECRContainer" def __init__( @@ -25,20 +57,45 @@ def __init__( session: boto3.session.Session, config: AWSConfig, docker_client: docker.client.DockerClient, - ): - + ) -> None: + """Initialize ECR container manager. + + Args: + system_config: SeBS system configuration + session: AWS boto3 session + config: AWS-specific configuration + docker_client: Docker client for local operations + """ super().__init__(system_config, docker_client) self.ecr_client = session.client(service_name="ecr", region_name=config.region) self.config = config @property def client(self) -> ECRClient: + """Get the ECR client. + + Returns: + ECRClient: AWS ECR client for registry operations + """ return self.ecr_client def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: - + """Generate ECR registry details for a benchmark image. + + Creates the registry name, repository name, image tag, and full image URI + for a specific benchmark configuration. + + Args: + benchmark: Name of the benchmark + language_name: Programming language (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (e.g., 'x64', 'arm64') + + Returns: + Tuple[str, str, str, str]: Registry name, repository name, image tag, and image URI + """ account_id = self.config.credentials.account_id region = self.config.region registry_name = f"{account_id}.dkr.ecr.{region}.amazonaws.com" @@ -51,7 +108,16 @@ def registry_name( return registry_name, repository_name, image_tag, image_uri - def find_image(self, repository_name, image_tag) -> bool: + def find_image(self, repository_name: str, image_tag: str) -> bool: + """Check if an image exists in the ECR repository. + + Args: + repository_name: Name of the ECR repository + image_tag: Tag of the image to search for + + Returns: + bool: True if the image exists, False otherwise + """ try: response = self.ecr_client.describe_images( repositoryName=repository_name, imageIds=[{"imageTag": image_tag}] @@ -63,8 +129,19 @@ def find_image(self, repository_name, image_tag) -> bool: return False - def push_image(self, repository_uri, image_tag): - + def push_image(self, repository_uri: str, image_tag: str) -> None: + """Push a Docker image to ECR. + + Authenticates with ECR using temporary credentials and pushes the + specified image to the repository. + + Args: + repository_uri: URI of the ECR repository + image_tag: Tag of the image to push + + Raises: + RuntimeError: If the push operation fails + """ username, password, registry_url = self.config.resources.ecr_repository_authorization( self.client ) diff --git a/sebs/aws/dynamodb.py b/sebs/aws/dynamodb.py index 0f3cc878..c0d9aff0 100644 --- a/sebs/aws/dynamodb.py +++ b/sebs/aws/dynamodb.py @@ -1,3 +1,13 @@ +"""AWS DynamoDB NoSQL storage implementation for SeBS. + +This module provides the DynamoDB class which implements NoSQL storage functionality +for the Serverless Benchmarking Suite using Amazon DynamoDB. It handles table +creation, data operations, and caching for benchmark data storage. + +Key classes: + DynamoDB: AWS DynamoDB NoSQL storage implementation +""" + from collections import defaultdict from typing import Dict, Optional, Tuple @@ -10,12 +20,34 @@ class DynamoDB(NoSQLStorage): + """AWS DynamoDB NoSQL storage implementation for SeBS. + + This class provides NoSQL storage functionality using Amazon DynamoDB. + It handles table creation, data operations, caching, and provides a + unified interface for benchmark data storage. + + Attributes: + client: DynamoDB client for AWS API operations + _tables: Mapping of benchmark names to table configurations + _serializer: DynamoDB type serializer for data conversion + """ + @staticmethod def typename() -> str: + """Get the type name for this storage system. + + Returns: + str: Type name ('AWS.DynamoDB') + """ return "AWS.DynamoDB" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment name for this storage system. + + Returns: + str: Deployment name ('aws') + """ return "aws" def __init__( @@ -26,7 +58,17 @@ def __init__( region: str, access_key: str, secret_key: str, - ): + ) -> None: + """Initialize DynamoDB NoSQL storage. + + Args: + session: AWS boto3 session + cache_client: Cache client for storing table configurations + resources: Cloud resource configuration + region: AWS region name + access_key: AWS access key ID + secret_key: AWS secret access key + """ super().__init__(region, cache_client, resources) self.client = session.client( "dynamodb", @@ -42,7 +84,14 @@ def __init__( self._serializer = TypeSerializer() def retrieve_cache(self, benchmark: str) -> bool: - + """Retrieve table configuration from cache. + + Args: + benchmark: Name of the benchmark + + Returns: + bool: True if cache was found and loaded, False otherwise + """ if benchmark in self._tables: return True @@ -53,8 +102,12 @@ def retrieve_cache(self, benchmark: str) -> bool: return False - def update_cache(self, benchmark: str): - + def update_cache(self, benchmark: str) -> None: + """Update cache with current table configuration. + + Args: + benchmark: Name of the benchmark to update cache for + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, @@ -64,10 +117,26 @@ def update_cache(self, benchmark: str): ) def get_tables(self, benchmark: str) -> Dict[str, str]: + """Get table mappings for a benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Dict[str, str]: Mapping of logical table names to actual DynamoDB table names + """ return self._tables[benchmark] def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: - + """Get the actual DynamoDB table name for a logical table. + + Args: + benchmark: Name of the benchmark + table: Logical table name used by the benchmark + + Returns: + Optional[str]: Actual DynamoDB table name, or None if not found + """ if benchmark not in self._tables: return None @@ -83,8 +152,19 @@ def write_to_table( data: dict, primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, - ): - + ) -> None: + """Write data to a DynamoDB table. + + Args: + benchmark: Name of the benchmark + table: Logical table name + data: Data to write to the table + primary_key: Primary key as (attribute_name, value) tuple + secondary_key: Optional secondary key as (attribute_name, value) tuple + + Raises: + AssertionError: If the table name is not found + """ table_name = self._get_table_name(benchmark, table) assert table_name is not None @@ -95,16 +175,27 @@ def write_to_table( serialized_data = {k: self._serializer.serialize(v) for k, v in data.items()} self.client.put_item(TableName=table_name, Item=serialized_data) - """ - AWS: create a DynamoDB Table - - In contrast to the hierarchy of database objects in Azure (account -> database -> container) - and GCP (database per benchmark), we need to create unique table names here. - """ - def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: + """Create a DynamoDB table for benchmark data. + + Creates a DynamoDB table with a unique name for the benchmark. Unlike + Azure (account -> database -> container) and GCP (database per benchmark), + AWS requires unique table names across the account. + + Args: + benchmark: Name of the benchmark + name: Logical table name + primary_key: Name of the primary key attribute + secondary_key: Optional name of the secondary key attribute + + Returns: + str: Name of the created table + + Raises: + RuntimeError: If table creation fails for unknown reasons + """ table_name = f"sebs-benchmarks-{self._cloud_resources.resources_id}-{benchmark}-{name}" @@ -169,7 +260,29 @@ def create_table( raise RuntimeError(f"Creating DynamoDB failed, unknown reason! Error: {e}") def clear_table(self, name: str) -> str: + """Clear all data from a table. + + Args: + name: Name of the table to clear + + Returns: + str: Result of the operation + + Raises: + NotImplementedError: This operation is not yet implemented + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """Remove a table completely. + + Args: + name: Name of the table to remove + + Returns: + str: Result of the operation + + Raises: + NotImplementedError: This operation is not yet implemented + """ raise NotImplementedError() diff --git a/sebs/aws/resources.py b/sebs/aws/resources.py index 5913c392..f8048d50 100644 --- a/sebs/aws/resources.py +++ b/sebs/aws/resources.py @@ -1,3 +1,13 @@ +"""AWS system resources management for SeBS. + +This module provides the AWSSystemResources class which manages AWS-specific +resources like S3 storage and DynamoDB NoSQL storage within the SeBS framework. +It handles initialization, caching, and provides access to AWS services. + +Key classes: + AWSSystemResources: Main resource manager for AWS services +""" + from typing import cast, Optional from sebs.aws.s3 import S3 @@ -14,12 +24,35 @@ class AWSSystemResources(SystemResources): + """AWS system resources manager for SeBS. + + This class manages AWS-specific resources including S3 storage and DynamoDB + NoSQL storage. It provides a unified interface for accessing AWS services + with proper session management and caching. + + Attributes: + _session: AWS boto3 session for API calls + _logging_handlers: Logging configuration handlers + _storage: S3 storage client instance + _nosql_storage: DynamoDB NoSQL storage client instance + """ + @staticmethod def typename() -> str: + """Get the type name for these resources. + + Returns: + str: The type name 'AWS.SystemResources' + """ return "AWS.SystemResources" @property def config(self) -> AWSConfig: + """Get the AWS configuration. + + Returns: + AWSConfig: AWS-specific configuration + """ return cast(AWSConfig, self._config) def __init__( @@ -28,7 +61,15 @@ def __init__( cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize AWS system resources. + + Args: + config: AWS-specific configuration + cache_client: Cache client for resource caching + docker_client: Docker client for container operations + logger_handlers: Logging configuration handlers + """ super().__init__(config, cache_client, docker_client) self._session: Optional[boto3.session.Session] = None @@ -36,19 +77,29 @@ def __init__( self._storage: Optional[S3] = None self._nosql_storage: Optional[DynamoDB] = None - def initialize_session(self, session: boto3.session.Session): + def initialize_session(self, session: boto3.session.Session) -> None: + """Initialize the AWS boto3 session. + + Args: + session: Boto3 session to use for AWS API calls + """ self._session = session - """ - Create a client instance for cloud storage. When benchmark and buckets - parameters are passed, then storage is initialized with required number - of buckets. Buckets may be created or retrieved from cache. - - :param replace_existing: replace existing files in cached buckets? - :return: storage client - """ - def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + """Get or create S3 storage client. + + Creates a client instance for S3 cloud storage. Storage is initialized + with required buckets that may be created or retrieved from cache. + + Args: + replace_existing: Whether to replace existing files in cached buckets + + Returns: + PersistentStorage: S3 storage client instance + + Raises: + AssertionError: If session has not been initialized + """ if not self._storage: assert self._session is not None @@ -68,6 +119,17 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStor return self._storage def get_nosql_storage(self) -> NoSQLStorage: + """Get or create DynamoDB NoSQL storage client. + + Creates a client instance for DynamoDB NoSQL storage. The client + is configured with AWS credentials and region from the system config. + + Returns: + NoSQLStorage: DynamoDB NoSQL storage client instance + + Raises: + AssertionError: If session has not been initialized + """ if not self._nosql_storage: assert self._session is not None self.logging.info("Initialize DynamoDB NoSQL instance.") diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index 79ca8905..82ebe836 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -1,3 +1,13 @@ +"""AWS S3 storage implementation for SeBS. + +This module provides the S3 class which implements persistent storage functionality +for the Serverless Benchmarking Suite using Amazon S3. It handles bucket creation, +file upload/download operations, and caching for benchmark data storage. + +Key classes: + S3: AWS S3 persistent storage implementation +""" + import os import uuid from typing import List, Optional @@ -10,20 +20,51 @@ class S3(PersistentStorage): + """AWS S3 persistent storage implementation for SeBS. + + This class provides persistent storage functionality using Amazon S3. + It handles bucket creation, file operations, and provides a unified + interface for benchmark data storage and retrieval. + + Attributes: + client: S3 client for AWS API operations + cached: Whether bucket configurations are cached + """ + @staticmethod def typename() -> str: + """Get the type name for this storage system. + + Returns: + str: Type name ('AWS.S3') + """ return "AWS.S3" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment name for this storage system. + + Returns: + str: Deployment name ('aws') + """ return "aws" @property def replace_existing(self) -> bool: + """Get whether to replace existing files. + + Returns: + bool: True if existing files should be replaced, False otherwise + """ return self._replace_existing @replace_existing.setter - def replace_existing(self, val: bool): + def replace_existing(self, val: bool) -> None: + """Set whether to replace existing files. + + Args: + val: True to replace existing files, False otherwise + """ self._replace_existing = val def __init__( @@ -35,7 +76,18 @@ def __init__( access_key: str, secret_key: str, replace_existing: bool, - ): + ) -> None: + """Initialize S3 persistent storage. + + Args: + session: AWS boto3 session + cache_client: Cache client for storing bucket configurations + resources: Cloud resource configuration + location: AWS region name + access_key: AWS access key ID + secret_key: AWS secret access key + replace_existing: Whether to replace existing files during uploads + """ super().__init__(location, cache_client, resources, replace_existing) self.client = session.client( "s3", @@ -46,11 +98,37 @@ def __init__( self.cached = False def correct_name(self, name: str) -> str: + """Correct bucket name for S3 naming requirements. + + Args: + name: Original bucket name + + Returns: + str: Corrected bucket name (no changes for S3) + """ return name def _create_bucket( self, name: str, buckets: List[str] = [], randomize_name: bool = False ) -> str: + """Create an S3 bucket with the specified name. + + Handles the complex S3 bucket creation logic including region-specific + requirements and conflict resolution. + + Args: + name: Desired bucket name + buckets: List of existing buckets to check against + randomize_name: Whether to append a random suffix to ensure uniqueness + + Returns: + str: Name of the created bucket + + Raises: + BucketAlreadyExists: If bucket already exists in the same region + ClientError: If bucket creation fails for other reasons + RuntimeError: If bucket already exists in us-east-1 region + """ for bucket_name in buckets: if name in bucket_name: self.logging.info( @@ -98,7 +176,17 @@ def _create_bucket( return bucket_name - def uploader_func(self, path_idx, key, filepath): + def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: + """Upload a file to S3 with caching and replacement logic. + + Handles the upload of benchmark files with appropriate caching behavior + and replacement logic based on configuration. + + Args: + path_idx: Index of the input path configuration + key: S3 object key for the file + filepath: Local path to the file to upload + """ # Skip upload when using cached buckets and not updating storage. if self.cached and not self.replace_existing: return @@ -115,22 +203,53 @@ def uploader_func(self, path_idx, key, filepath): self.upload(bucket_name, filepath, key) - def upload(self, bucket_name: str, filepath: str, key: str): + def upload(self, bucket_name: str, filepath: str, key: str) -> None: + """Upload a file to S3. + + Args: + bucket_name: Name of the S3 bucket + filepath: Local path to the file to upload + key: S3 object key for the uploaded file + """ self.logging.info("Upload {} to {}".format(filepath, bucket_name)) self.client.upload_file(Filename=filepath, Bucket=bucket_name, Key=key) - def download(self, bucket_name: str, key: str, filepath: str): + def download(self, bucket_name: str, key: str, filepath: str) -> None: + """Download a file from S3. + + Args: + bucket_name: Name of the S3 bucket + key: S3 object key of the file to download + filepath: Local path where the file should be saved + """ self.logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) self.client.download_file(Bucket=bucket_name, Key=key, Filename=filepath) def exists_bucket(self, bucket_name: str) -> bool: + """Check if an S3 bucket exists and is accessible. + + Args: + bucket_name: Name of the bucket to check + + Returns: + bool: True if bucket exists and is accessible, False otherwise + """ try: self.client.head_bucket(Bucket=bucket_name) return True except self.client.exceptions.ClientError: return False - def list_bucket(self, bucket_name: str, prefix: str = ""): + def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """List objects in an S3 bucket with optional prefix filtering. + + Args: + bucket_name: Name of the S3 bucket + prefix: Optional prefix to filter objects + + Returns: + List[str]: List of object keys in the bucket + """ objects_list = self.client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) objects: List[str] if "Contents" in objects_list: @@ -140,17 +259,38 @@ def list_bucket(self, bucket_name: str, prefix: str = ""): return objects def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """List S3 buckets with optional name filtering. + + Args: + bucket_name: Optional bucket name pattern to filter by + + Returns: + List[str]: List of bucket names + """ s3_buckets = self.client.list_buckets()["Buckets"] if bucket_name is not None: return [bucket["Name"] for bucket in s3_buckets if bucket_name in bucket["Name"]] else: return [bucket["Name"] for bucket in s3_buckets] - def clean_bucket(self, bucket: str): + def clean_bucket(self, bucket: str) -> None: + """Remove all objects from an S3 bucket. + + Args: + bucket: Name of the bucket to clean + """ objects = self.client.list_objects_v2(Bucket=bucket) if "Contents" in objects: objects = [{"Key": obj["Key"]} for obj in objects["Contents"]] # type: ignore self.client.delete_objects(Bucket=bucket, Delete={"Objects": objects}) # type: ignore - def remove_bucket(self, bucket: str): + def remove_bucket(self, bucket: str) -> None: + """Delete an S3 bucket. + + Args: + bucket: Name of the bucket to delete + + Note: + The bucket must be empty before it can be deleted + """ self.client.delete_bucket(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f1831459..75ebbcd0 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -1,3 +1,14 @@ +"""AWS trigger implementations for SeBS. + +This module provides trigger implementations for AWS Lambda functions, +including library (direct SDK) triggers and HTTP triggers via API Gateway. +Triggers handle function invocation and result processing. + +Key classes: + LibraryTrigger: Direct Lambda SDK invocation trigger + HTTPTrigger: HTTP API Gateway trigger +""" + import base64 import concurrent.futures import datetime @@ -9,30 +20,80 @@ class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + """AWS Lambda library trigger for direct SDK invocation. + + This trigger uses the AWS Lambda SDK to directly invoke Lambda functions. + It provides both synchronous and asynchronous invocation methods with + comprehensive result parsing and error handling. + + Attributes: + name: Name of the Lambda function + _deployment_client: AWS deployment client for Lambda operations + """ + + def __init__(self, fname: str, deployment_client: Optional[AWS] = None) -> None: + """Initialize the library trigger. + + Args: + fname: Name of the Lambda function + deployment_client: AWS deployment client (can be set later) + """ super().__init__() self.name = fname self._deployment_client = deployment_client @staticmethod def typename() -> str: + """Get the type name for this trigger. + + Returns: + str: Type name ('AWS.LibraryTrigger') + """ return "AWS.LibraryTrigger" @property def deployment_client(self) -> AWS: + """Get the AWS deployment client. + + Returns: + AWS: AWS deployment client + + Raises: + AssertionError: If deployment client is not set + """ assert self._deployment_client return self._deployment_client @deployment_client.setter - def deployment_client(self, deployment_client: AWS): + def deployment_client(self, deployment_client: AWS) -> None: + """Set the AWS deployment client. + + Args: + deployment_client: AWS deployment client to set + """ self._deployment_client = deployment_client @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type. + + Returns: + Trigger.TriggerType: LIBRARY trigger type + """ return Trigger.TriggerType.LIBRARY def sync_invoke(self, payload: dict) -> ExecutionResult: - + """Synchronously invoke the Lambda function. + + Invokes the Lambda function with the provided payload and waits for + the result. Parses AWS-specific metrics and benchmark output. + + Args: + payload: Dictionary payload to send to the function + + Returns: + ExecutionResult: Result of the function execution including metrics + """ self.logging.debug(f"Invoke function {self.name}") serialized_payload = json.dumps(payload).encode("utf-8") @@ -67,7 +128,21 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: aws_result.parse_benchmark_output(json.loads(function_output["body"])) return aws_result - def async_invoke(self, payload: dict): + def async_invoke(self, payload: dict) -> dict: + """Asynchronously invoke the Lambda function. + + Triggers the Lambda function asynchronously without waiting for + the result. Used for fire-and-forget invocations. + + Args: + payload: Dictionary payload to send to the function + + Returns: + dict: AWS Lambda invocation response + + Raises: + RuntimeError: If the async invocation fails + """ # FIXME: proper return type serialized_payload = json.dumps(payload).encode("utf-8") @@ -85,41 +160,113 @@ def async_invoke(self, payload: dict): return ret def serialize(self) -> dict: + """Serialize the trigger to a dictionary. + + Returns: + dict: Serialized trigger configuration + """ return {"type": "Library", "name": self.name} @staticmethod def deserialize(obj: dict) -> Trigger: + """Deserialize a trigger from a dictionary. + + Args: + obj: Dictionary containing trigger configuration + + Returns: + Trigger: Deserialized LibraryTrigger instance + """ return LibraryTrigger(obj["name"]) class HTTPTrigger(Trigger): - def __init__(self, url: str, api_id: str): + """AWS API Gateway HTTP trigger for Lambda functions. + + This trigger uses HTTP requests to invoke Lambda functions through + AWS API Gateway. It provides both synchronous and asynchronous + invocation methods. + + Attributes: + url: API Gateway endpoint URL + api_id: API Gateway API ID + """ + + def __init__(self, url: str, api_id: str) -> None: + """Initialize the HTTP trigger. + + Args: + url: API Gateway endpoint URL + api_id: API Gateway API ID + """ super().__init__() self.url = url self.api_id = api_id @staticmethod def typename() -> str: + """Get the type name for this trigger. + + Returns: + str: Type name ('AWS.HTTPTrigger') + """ return "AWS.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type. + + Returns: + Trigger.TriggerType: HTTP trigger type + """ return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: - + """Synchronously invoke the function via HTTP. + + Sends an HTTP request to the API Gateway endpoint and waits + for the response. + + Args: + payload: Dictionary payload to send to the function + + Returns: + ExecutionResult: Result of the HTTP invocation + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url) def async_invoke(self, payload: dict) -> concurrent.futures.Future: - + """Asynchronously invoke the function via HTTP. + + Submits the HTTP invocation to a thread pool for asynchronous execution. + + Args: + payload: Dictionary payload to send to the function + + Returns: + concurrent.futures.Future: Future object for the async invocation + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """Serialize the trigger to a dictionary. + + Returns: + dict: Serialized trigger configuration + """ return {"type": "HTTP", "url": self.url, "api-id": self.api_id} @staticmethod def deserialize(obj: dict) -> Trigger: + """Deserialize a trigger from a dictionary. + + Args: + obj: Dictionary containing trigger configuration + + Returns: + Trigger: Deserialized HTTPTrigger instance + """ return HTTPTrigger(obj["url"], obj["api-id"]) diff --git a/sebs/azure/__init__.py b/sebs/azure/__init__.py index 499b1372..394fabb2 100644 --- a/sebs/azure/__init__.py +++ b/sebs/azure/__init__.py @@ -1,3 +1,42 @@ +"""Azure integration package for SeBS benchmarking. + +This package provides comprehensive Azure integration for the Serverless +Benchmarking Suite (SeBS). It includes all necessary components for deploying, +managing, and benchmarking serverless functions on Microsoft Azure. + +Main Components: + Azure: Main system class for Azure platform integration + AzureFunction: Azure Function representation and management + AzureConfig: Configuration management for Azure credentials and resources + BlobStorage: Azure Blob Storage integration for data management + +The package handles: + - Azure Functions deployment and lifecycle management + - Azure Storage integration for benchmark data + - CosmosDB support for NoSQL benchmarks + - Resource group and subscription management + - Azure CLI integration via Docker containers + - Performance metrics collection via Application Insights + +Example: + Basic usage for Azure benchmarking: + + ```python + from sebs.azure import Azure, AzureConfig + + # Load configuration + config = AzureConfig.deserialize(config_dict, cache, handlers) + + # Initialize Azure system + azure = Azure(sebs_config, config, cache, docker_client, handlers) + azure.initialize() + + # Deploy and benchmark functions + function = azure.create_function(code_package, func_name, False, "") + result = function.invoke(payload) + ``` +""" + from .azure import Azure # noqa from .function import AzureFunction # noqa from .config import AzureConfig # noqa diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index dd8cf850..2208c2ce 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -1,3 +1,37 @@ +"""Azure serverless platform implementation for SeBS benchmarking. + +This module provides the Azure implementation of the SeBS serverless +benchmarking system. It handles Azure Functions deployment, resource +management, code packaging, and benchmark execution on Microsoft Azure. + +Key features: + - Azure Functions deployment and management + - Azure Storage integration for code and data + - CosmosDB support for NoSQL benchmarks + - HTTP trigger configuration and invocation + - Performance metrics collection via Application Insights + - Resource lifecycle management + +The main class Azure extends the base System class to provide Azure-specific +functionality for serverless function benchmarking. + +Example: + Basic usage for Azure benchmarking: + + ```python + from sebs.azure.azure import Azure + from sebs.azure.config import AzureConfig + + # Initialize Azure system with configuration + azure_system = Azure(sebs_config, azure_config, cache, docker_client, handlers) + azure_system.initialize() + + # Deploy and benchmark functions + function = azure_system.create_function(code_package, func_name, False, "") + result = function.invoke(payload) + ``` +""" + import datetime import json import re @@ -27,28 +61,62 @@ class Azure(System): + """Azure serverless platform implementation. + + This class implements the Azure-specific functionality for the SeBS + benchmarking suite. It handles Azure Functions deployment, resource + management, and benchmark execution on Microsoft Azure platform. + + Attributes: + logs_client: Azure logs client (currently unused) + storage: BlobStorage instance for Azure Blob Storage operations + cached: Flag indicating if resources are cached + _config: Azure configuration containing credentials and resources + AZURE_RUNTIMES: Mapping of language names to Azure runtime identifiers + """ + logs_client = None storage: BlobStorage - cached = False + cached: bool = False _config: AzureConfig # runtime mapping AZURE_RUNTIMES = {"python": "python", "nodejs": "node"} @staticmethod - def name(): + def name() -> str: + """Get the platform name. + + Returns: + Platform name 'azure'. + """ return "azure" @property def config(self) -> AzureConfig: + """Get Azure configuration. + + Returns: + Azure configuration containing credentials and resources. + """ return self._config @staticmethod def function_type() -> Type[Function]: + """Get the function type for Azure. + + Returns: + AzureFunction class type. + """ return AzureFunction @property def cli_instance(self) -> AzureCLI: + """Get Azure CLI instance. + + Returns: + Azure CLI instance for executing Azure commands. + """ return cast(AzureSystemResources, self._system_resources).cli_instance def __init__( @@ -58,7 +126,16 @@ def __init__( cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize Azure system. + + Args: + sebs_config: SeBS configuration settings + config: Azure-specific configuration + cache_client: Cache for storing function and resource data + docker_client: Docker client for container operations + logger_handlers: Logging handlers for output management + """ super().__init__( sebs_config, cache_client, @@ -68,26 +145,39 @@ def __init__( self.logging_handlers = logger_handlers self._config = config - """ - Start the Docker container running Azure CLI tools. - """ - def initialize( self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None, - ): + ) -> None: + """Initialize Azure system and start CLI container. + + Initializes Azure resources and allocates shared resources like + data storage account. Starts the Docker container with Azure CLI tools. + + Args: + config: Additional configuration parameters + resource_prefix: Optional prefix for resource naming + """ self.initialize_resources(select_prefix=resource_prefix) self.allocate_shared_resource() - def shutdown(self): + def shutdown(self) -> None: + """Shutdown Azure system and cleanup resources. + + Stops the Azure CLI container and performs cleanup of system resources. + """ cast(AzureSystemResources, self._system_resources).shutdown() super().shutdown() def find_deployments(self) -> List[str]: - - """ - Look for duplicated resource groups. + """Find existing SeBS deployments by scanning resource groups. + + Looks for Azure resource groups matching the SeBS naming pattern + to identify existing deployments that can be reused. + + Returns: + List of deployment identifiers found in resource groups. """ resource_groups = self.config.resources.list_resource_groups(self.cli_instance) deployments = [] @@ -99,22 +189,15 @@ def find_deployments(self) -> List[str]: return deployments - """ - Allow multiple deployment clients share the same settings. - Not an ideal situation, but makes regression testing much simpler. - """ - - def allocate_shared_resource(self): + def allocate_shared_resource(self) -> None: + """Allocate shared data storage account. + + Creates or retrieves the shared data storage account used for + benchmark input/output data. This allows multiple deployment + clients to share the same storage, simplifying regression testing. + """ self.config.resources.data_storage_account(self.cli_instance) - # Directory structure - # handler - # - source files - # - Azure wrappers - handler, storage - # - additional resources - # - function.json - # host.json - # requirements.txt/package.json def package_code( self, directory: str, @@ -125,6 +208,30 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """Package function code for Azure Functions deployment. + + Creates the proper directory structure and configuration files + required for Azure Functions deployment. The structure includes: + - handler/ directory with source files and Azure wrappers + - function.json with trigger and binding configuration + - host.json with runtime configuration + - requirements.txt or package.json with dependencies + + Args: + directory: Directory containing the function code + language_name: Programming language (python, nodejs) + language_version: Language runtime version + architecture: Target architecture (currently unused) + benchmark: Name of the benchmark + is_cached: Whether the package is from cache + container_deployment: Whether to use container deployment + + Returns: + Tuple of (directory_path, code_size_bytes, container_uri) + + Raises: + NotImplementedError: If container deployment is requested. + """ container_uri = "" @@ -187,6 +294,23 @@ def publish_function( container_dest: str, repeat_on_failure: bool = False, ) -> str: + """Publish function code to Azure Functions. + + Deploys the packaged function code to Azure Functions using the + Azure Functions CLI tools. Handles retries and URL extraction. + + Args: + function: Function instance to publish + code_package: Benchmark code package to deploy + container_dest: Destination path in the CLI container + repeat_on_failure: Whether to retry on failure + + Returns: + URL for invoking the published function. + + Raises: + RuntimeError: If function publication fails or URL cannot be found. + """ success = False url = "" self.logging.info("Attempting publish of function {}".format(function.name)) @@ -245,24 +369,27 @@ def publish_function( raise e return url - """ - Publish function code on Azure. - Boolean flag enables repeating publish operation until it succeeds. - Useful for publish immediately after function creation where it might - take from 30-60 seconds for all Azure caches to be updated. - - :param name: function name - :param repeat_on_failure: keep repeating if command fails on unknown name. - :return: URL to reach HTTP-triggered function - """ - def update_function( self, function: Function, code_package: Benchmark, container_deployment: bool, container_uri: str, - ): + ) -> None: + """Update existing Azure Function with new code. + + Updates an existing Azure Function with new code package, + including environment variables and function configuration. + + Args: + function: Function instance to update + code_package: New benchmark code package + container_deployment: Whether using container deployment + container_uri: Container URI (unused for Azure) + + Raises: + NotImplementedError: If container deployment is requested. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Azure") @@ -293,7 +420,21 @@ def update_function( trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) - def update_envs(self, function: Function, code_package: Benchmark, env_variables: dict = {}): + def update_envs(self, function: Function, code_package: Benchmark, env_variables: dict = {}) -> None: + """Update environment variables for Azure Function. + + Sets up environment variables required for benchmark execution, + including storage connection strings and NoSQL database credentials. + Preserves existing environment variables while adding new ones. + + Args: + function: Function instance to update + code_package: Benchmark code package with requirements + env_variables: Additional environment variables to set + + Raises: + RuntimeError: If environment variable operations fail. + """ envs = {} if code_package.uses_nosql: @@ -377,13 +518,33 @@ def update_envs(self, function: Function, code_package: Benchmark, env_variables self.logging.error(e) raise e - def update_function_configuration(self, function: Function, code_package: Benchmark): + def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: + """Update Azure Function configuration. + + Currently not implemented for Azure Functions as memory and timeout + configuration is handled at the consumption plan level. + + Args: + function: Function instance to configure + code_package: Benchmark code package with requirements + """ # FIXME: this does nothing currently - we don't specify timeout self.logging.warning( "Updating function's memory and timeout configuration is not supported." ) def _mount_function_code(self, code_package: Benchmark) -> str: + """Mount function code package in Azure CLI container. + + Uploads the function code package to a temporary location in the + Azure CLI container for deployment operations. + + Args: + code_package: Benchmark code package to mount + + Returns: + Path to mounted code in the CLI container. + """ dest = os.path.join("/mnt", "function", uuid.uuid4().hex) self.cli_instance.upload_package(code_package.code_location, dest) return dest @@ -391,8 +552,18 @@ def _mount_function_code(self, code_package: Benchmark) -> str: def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: - """ - Functionapp names must be globally unique in Azure. + """Generate default function name for Azure. + + Creates a globally unique function name based on resource ID, + benchmark name, language, and version. Function app names must + be globally unique across all of Azure. + + Args: + code_package: Benchmark code package + resources: Optional resources (unused) + + Returns: + Globally unique function name for Azure. """ func_name = ( "sebs-{}-{}-{}-{}".format( @@ -413,6 +584,25 @@ def create_function( container_deployment: bool, container_uri: str, ) -> AzureFunction: + """Create new Azure Function. + + Creates a new Azure Function App and deploys the provided code package. + Handles function app creation, storage account allocation, and initial + deployment with proper configuration. + + Args: + code_package: Benchmark code package to deploy + func_name: Name for the Azure Function App + container_deployment: Whether to use container deployment + container_uri: Container URI (unused for Azure) + + Returns: + AzureFunction instance representing the created function. + + Raises: + NotImplementedError: If container deployment is requested. + RuntimeError: If function creation fails. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Azure") @@ -496,8 +686,15 @@ def create_function( ) return function - def cached_function(self, function: Function): - + def cached_function(self, function: Function) -> None: + """Initialize cached function with current configuration. + + Sets up a cached function with current data storage account + and logging handlers for all triggers. + + Args: + function: Function instance loaded from cache + """ data_storage_account = self.config.resources.data_storage_account(self.cli_instance) for trigger in function.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) @@ -511,7 +708,20 @@ def download_metrics( end_time: int, requests: Dict[str, ExecutionResult], metrics: Dict[str, dict], - ): + ) -> None: + """Download execution metrics from Azure Application Insights. + + Retrieves performance metrics for function executions from Azure + Application Insights and updates the execution results with + provider-specific timing information. + + Args: + function_name: Name of the Azure Function + start_time: Start timestamp for metrics collection + end_time: End timestamp for metrics collection + requests: Dictionary of execution results to update + metrics: Additional metrics dictionary (unused) + """ self.cli_instance.install_insights() @@ -584,14 +794,30 @@ def download_metrics( # TODO: query performance counters for mem - def _enforce_cold_start(self, function: Function, code_package: Benchmark): - + def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> None: + """Enforce cold start for a single function. + + Updates environment variable to force cold start behavior. + + Args: + function: Function instance to update + code_package: Benchmark code package + """ self.update_envs(function, code_package, {"ForceColdStart": str(self.cold_start_counter)}) # FIXME: is this sufficient to enforce cold starts? # self.update_function(function, code_package, False, "") - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """Enforce cold start for multiple functions. + + Forces cold start behavior for all provided functions by updating + environment variables and waiting for changes to propagate. + + Args: + functions: List of functions to enforce cold start for + code_package: Benchmark code package + """ self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func, code_package) @@ -599,10 +825,17 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) time.sleep(20) - """ - The only implemented trigger at the moment is HTTPTrigger. - It is automatically created for each function. - """ - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """Create trigger for Azure Function. + + Currently not implemented as HTTP triggers are automatically + created for each function during deployment. + + Args: + function: Function to create trigger for + trigger_type: Type of trigger to create + + Raises: + NotImplementedError: Trigger creation is not supported. + """ raise NotImplementedError() diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index 079e72d3..70842ff1 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -1,3 +1,30 @@ +"""Azure Blob Storage implementation for SeBS benchmarking. + +This module provides Azure Blob Storage integration for the SeBS benchmarking +suite. It handles container management, file uploads/downloads, and storage +operations required for serverless function benchmarking. + +The BlobStorage class implements the PersistentStorage interface to provide +Azure-specific storage operations including container creation, file management, +and cleanup operations. + +Example: + Basic usage for Azure Blob Storage: + + ```python + from sebs.azure.blob_storage import BlobStorage + + # Initialize with connection string + storage = BlobStorage(region, cache, resources, connection_string, False) + + # Upload benchmark data + storage.upload(container_name, filepath, key) + + # Download results + storage.download(container_name, key, local_filepath) + ``` +""" + import os import uuid from typing import List, Optional @@ -10,12 +37,33 @@ class BlobStorage(PersistentStorage): + """Azure Blob Storage implementation for benchmark data management. + + This class provides Azure Blob Storage operations for storing and retrieving + benchmark input data, function outputs, and temporary files. It manages + containers (equivalent to S3 buckets) and handles file operations with + proper error handling and logging. + + Attributes: + client: Azure Blob Service client for storage operations + """ + @staticmethod def typename() -> str: + """Get the storage type name. + + Returns: + Storage type identifier for Azure Blob Storage. + """ return "Azure.BlobStorage" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment platform name. + + Returns: + Platform name 'azure'. + """ return "azure" def __init__( @@ -25,17 +73,35 @@ def __init__( resources: Resources, conn_string: str, replace_existing: bool, - ): + ) -> None: + """Initialize Azure Blob Storage. + + Args: + region: Azure region for storage operations + cache_client: Cache for storing storage configuration + resources: Resources configuration + conn_string: Azure Storage connection string + replace_existing: Whether to replace existing files + """ super().__init__(region, cache_client, resources, replace_existing) self.client: BlobServiceClient = BlobServiceClient.from_connection_string(conn_string) - """ - Internal implementation of creating a new container. - """ - def _create_bucket( self, name: str, containers: List[str] = [], randomize_name: bool = False ) -> str: + """Create new Azure Blob Storage container. + + Internal implementation for creating containers with optional + name randomization and existence checking. + + Args: + name: Base name for the container + containers: List of existing containers to check + randomize_name: Whether to append random suffix to name + + Returns: + Name of the created or existing container. + """ for c in containers: if name in c: self.logging.info("Container {} for {} already exists, skipping.".format(c, name)) @@ -47,14 +113,31 @@ def _create_bucket( self.logging.info("Created container {}".format(name)) return name - """ - Azure does not allow dots in container names. - """ - def correct_name(self, name: str) -> str: + """Correct container name for Azure requirements. + + Azure Blob Storage does not allow dots in container names, + so they are replaced with hyphens. + + Args: + name: Original container name + + Returns: + Corrected container name with dots replaced by hyphens. + """ return name.replace(".", "-") def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """List Azure Blob Storage containers. + + Lists all containers or those matching a prefix. + + Args: + bucket_name: Optional prefix to filter container names + + Returns: + List of container names. + """ if bucket_name is not None: return [ container["name"] @@ -63,7 +146,17 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: else: return [container["name"] for container in self.client.list_containers()] - def uploader_func(self, container_idx, file, filepath): + def uploader_func(self, container_idx: int, file: str, filepath: str) -> None: + """Upload file to Azure Blob Storage container. + + Uploads a file to the specified container with proper path handling + and duplicate checking. + + Args: + container_idx: Index of the container for file organization + file: Name of the file being uploaded + filepath: Local path to the file to upload + """ # Skip upload when using cached containers if self.cached and not self.replace_existing: return @@ -82,37 +175,60 @@ def uploader_func(self, container_idx, file, filepath): client.upload_blob(data=file_data, overwrite=True) self.logging.info("Upload {} to {}".format(filepath, container_name)) - """ - Download file from bucket. - - :param container_name: - :param file: - :param filepath: - """ - - def download(self, container_name: str, key: str, filepath: str): + def download(self, container_name: str, key: str, filepath: str) -> None: + """Download file from Azure Blob Storage. + + Downloads a blob from the specified container to a local file. + + Args: + container_name: Name of the Azure Blob Storage container + key: Blob key/name in the container + filepath: Local file path to save the downloaded content + """ self.logging.info("Download {}:{} to {}".format(container_name, key, filepath)) client = self.client.get_blob_client(container_name, key) with open(filepath, "wb") as download_file: download_file.write(client.download_blob().readall()) - def upload(self, container_name: str, filepath: str, key: str): + def upload(self, container_name: str, filepath: str, key: str) -> None: + """Upload file to Azure Blob Storage. + + Uploads a local file to the specified container with the given key. + + Args: + container_name: Name of the Azure Blob Storage container + filepath: Local file path to upload + key: Blob key/name in the container + """ self.logging.info("Upload {} to {}".format(filepath, container_name)) client = self.client.get_blob_client(container_name, key) with open(filepath, "rb") as upload_file: client.upload_blob(upload_file) # type: ignore def exists_bucket(self, container: str) -> bool: + """Check if Azure Blob Storage container exists. + + Args: + container: Name of the container to check + + Returns: + True if container exists, False otherwise. + """ return self.client.get_container_client(container).exists() - """ - Return list of files in a container. - - :param container: - :return: list of file names. empty if container empty - """ - - def list_bucket(self, container: str, prefix: str = ""): + def list_bucket(self, container: str, prefix: str = "") -> List[str]: + """List files in Azure Blob Storage container. + + Returns list of blob names in the specified container, + optionally filtered by prefix. + + Args: + container: Name of the container to list + prefix: Optional prefix to filter blob names + + Returns: + List of blob names. Empty list if container is empty. + """ objects = list( map( lambda x: x["name"], @@ -121,12 +237,27 @@ def list_bucket(self, container: str, prefix: str = ""): ) return [x for x in objects if prefix in x] - def clean_bucket(self, bucket: str): + def clean_bucket(self, bucket: str) -> None: + """Clean all blobs from Azure Blob Storage container. + + Removes all blobs from the specified container but keeps + the container itself. + + Args: + bucket: Name of the container to clean + """ self.logging.info("Clean output container {}".format(bucket)) container_client = self.client.get_container_client(bucket) blobs = list(map(lambda x: x["name"], container_client.list_blobs())) if len(blobs) > 0: container_client.delete_blobs(*blobs) - def remove_bucket(self, bucket: str): + def remove_bucket(self, bucket: str) -> None: + """Remove Azure Blob Storage container. + + Deletes the entire container and all its contents. + + Args: + bucket: Name of the container to remove + """ self.client.get_container_client(bucket).delete_container() diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index b875ee02..caaa3102 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -1,7 +1,38 @@ +"""Azure CLI Docker container management for SeBS benchmarking. + +This module provides a wrapper around the Azure CLI running in a Docker container. +It handles container lifecycle, command execution, file uploads, and Azure-specific +operations required for serverless function deployment and management. + +The AzureCLI class manages a Docker container with Azure CLI tools and provides +methods for executing Azure commands, uploading function packages, and handling +authentication. + +Example: + Basic usage for Azure CLI operations: + + ```python + from sebs.azure.cli import AzureCLI + + # Initialize CLI container + cli = AzureCLI(system_config, docker_client) + + # Login to Azure + cli.login(app_id, tenant, password) + + # Execute Azure CLI commands + result = cli.execute("az group list") + + # Upload function package + cli.upload_package(local_dir, container_dest) + ``` +""" + import io import logging import os import tarfile +from typing import Optional import docker @@ -10,8 +41,30 @@ class AzureCLI(LoggingBase): - def __init__(self, system_config: SeBSConfig, docker_client: docker.client): - + """Azure CLI Docker container wrapper. + + This class manages a Docker container running Azure CLI tools and provides + methods for executing Azure commands, handling authentication, and managing + file transfers for serverless function deployment. + + Attributes: + docker_instance: Docker container running Azure CLI + _insights_installed: Flag indicating if Application Insights extension is installed + """ + + def __init__(self, system_config: SeBSConfig, docker_client: docker.client) -> None: + """Initialize Azure CLI container. + + Creates and starts a Docker container with Azure CLI tools installed. + Handles image pulling if not available locally. + + Args: + system_config: SeBS system configuration + docker_client: Docker client for container operations + + Raises: + RuntimeError: If Docker image pull fails. + """ super().__init__() repo_name = system_config.docker_repository() @@ -40,7 +93,7 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): detach=True, tty=True, ) - self._insights_installed = False + self._insights_installed: bool = False self.logging.info(f"Started Azure CLI container: {self.docker_instance.id}.") while True: try: @@ -52,14 +105,28 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): @staticmethod def typename() -> str: + """Get the CLI type name. + + Returns: + Type identifier for Azure CLI. + """ return "Azure.CLI" - """ - Execute the given command in Azure CLI. - Throws an exception on failure (commands are expected to execute succesfully). - """ - - def execute(self, cmd: str): + def execute(self, cmd: str) -> bytes: + """Execute Azure CLI command in Docker container. + + Executes the given command in the Azure CLI container and returns + the output. Raises an exception if the command fails. + + Args: + cmd: Azure CLI command to execute + + Returns: + Command output as bytes. + + Raises: + RuntimeError: If command execution fails. + """ exit_code, out = self.docker_instance.exec_run(cmd, user="docker_user") if exit_code != 0: raise RuntimeError( @@ -69,11 +136,20 @@ def execute(self, cmd: str): ) return out - """ - Run azure login command on Docker instance. - """ - def login(self, appId: str, tenant: str, password: str) -> bytes: + """Login to Azure using service principal credentials. + + Authenticates with Azure using service principal credentials + within the Docker container. + + Args: + appId: Azure application (client) ID + tenant: Azure tenant (directory) ID + password: Azure client secret + + Returns: + Login command output as bytes. + """ result = self.execute( "az login -u {0} --service-principal --tenant {1} -p {2}".format( appId, @@ -84,38 +160,44 @@ def login(self, appId: str, tenant: str, password: str) -> bytes: self.logging.info("Azure login succesful") return result - def upload_package(self, directory: str, dest: str): - - """ - This is not an efficient and memory-intensive implementation. - So far, we didn't have very large functions that require many gigabytes. - - Since docker-py does not support a straightforward copy, and we can't - put_archive in chunks. - - If we end up having problems because of the archive size, there are two - potential solutions: - (1) manually call docker cp and decompress - (2) commit the docker container and restart with a new mount volume. + def upload_package(self, directory: str, dest: str) -> None: + """Upload function package to Docker container. + + Creates a compressed archive of the function package and uploads + it to the specified destination in the Docker container. + + Note: + This implementation loads the entire archive into memory, + which may not be efficient for very large function packages. + For large packages, consider using docker cp directly. + + Args: + directory: Local directory containing function package + dest: Destination path in the Docker container """ handle = io.BytesIO() with tarfile.open(fileobj=handle, mode="w:gz") as tar: for f in os.listdir(directory): tar.add(os.path.join(directory, f), arcname=f) - # shutil.make_archive(, 'zip', directory) # move to the beginning of memory before writing handle.seek(0) self.execute("mkdir -p {}".format(dest)) self.docker_instance.put_archive(path=dest, data=handle.read()) - def install_insights(self): + def install_insights(self) -> None: + """Install Azure Application Insights CLI extension. + + Installs the Application Insights extension for Azure CLI + if not already installed. Required for metrics collection. + """ if not self._insights_installed: self.execute("az extension add --name application-insights") + self._insights_installed = True - """ - Shutdowns Docker instance. - """ - - def shutdown(self): + def shutdown(self) -> None: + """Shutdown Azure CLI Docker container. + + Stops and removes the Docker container running Azure CLI tools. + """ self.logging.info("Stopping Azure manage Docker instance") self.docker_instance.stop() diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 9aef0d8c..32da2aff 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -1,3 +1,29 @@ +"""Configuration management for Azure serverless benchmarking. + +This module provides configuration classes for Azure resources, credentials, +and deployment settings. It handles Azure-specific configuration including +service principal authentication, resource group management, storage accounts, +and CosmosDB setup for the SeBS benchmarking suite. + +Key classes: + AzureCredentials: Manages Azure service principal authentication + AzureResources: Manages Azure resource allocation and lifecycle + AzureConfig: Combines credentials and resources for Azure deployment + +Example: + Basic usage for setting up Azure configuration: + + ```python + from sebs.azure.config import AzureConfig, AzureCredentials, AzureResources + from sebs.cache import Cache + + # Load configuration from config dict and cache + config = AzureConfig.deserialize(config_dict, cache, handlers) + credentials = config.credentials + resources = config.resources + ``` +""" + import json import logging import os @@ -5,7 +31,6 @@ import uuid from typing import cast, Dict, List, Optional - from sebs.azure.cli import AzureCLI from sebs.azure.cloud_resources import CosmosDBAccount from sebs.cache import Cache @@ -14,14 +39,35 @@ class AzureCredentials(Credentials): + """Azure service principal credentials for authentication. + + This class manages Azure service principal credentials required for + authenticating with Azure services. It handles app ID, tenant ID, + password, and subscription ID validation and caching. + + Attributes: + _appId: Azure application (client) ID + _tenant: Azure tenant (directory) ID + _password: Azure client secret + _subscription_id: Azure subscription ID (optional) + """ _appId: str _tenant: str _password: str + _subscription_id: Optional[str] def __init__( self, appId: str, tenant: str, password: str, subscription_id: Optional[str] = None - ): + ) -> None: + """Initialize Azure credentials. + + Args: + appId: Azure application (client) ID + tenant: Azure tenant (directory) ID + password: Azure client secret + subscription_id: Azure subscription ID (optional) + """ super().__init__() self._appId = appId self._tenant = tenant @@ -30,24 +76,54 @@ def __init__( @property def appId(self) -> str: + """Get the Azure application (client) ID. + + Returns: + Azure application ID string. + """ return self._appId @property def tenant(self) -> str: + """Get the Azure tenant (directory) ID. + + Returns: + Azure tenant ID string. + """ return self._tenant @property def password(self) -> str: + """Get the Azure client secret. + + Returns: + Azure client secret string. + """ return self._password @property def subscription_id(self) -> str: + """Get the Azure subscription ID. + + Returns: + Azure subscription ID string. + + Raises: + AssertionError: If subscription ID is not set. + """ assert self._subscription_id is not None return self._subscription_id @subscription_id.setter - def subscription_id(self, subscription_id: str): - + def subscription_id(self, subscription_id: str) -> None: + """Set the Azure subscription ID with validation. + + Args: + subscription_id: Azure subscription ID to set + + Raises: + RuntimeError: If provided subscription ID conflicts with cached value. + """ if self._subscription_id is not None and subscription_id != self._subscription_id: self.logging.error( f"The subscription id {subscription_id} from provided " @@ -64,15 +140,44 @@ def subscription_id(self, subscription_id: str): @property def has_subscription_id(self) -> bool: + """Check if subscription ID is set. + + Returns: + True if subscription ID is set, False otherwise. + """ return self._subscription_id is not None @staticmethod def initialize(dct: dict, subscription_id: Optional[str]) -> "AzureCredentials": + """Initialize credentials from dictionary. + + Args: + dct: Dictionary containing credential information + subscription_id: Optional subscription ID to set + + Returns: + New AzureCredentials instance. + """ return AzureCredentials(dct["appId"], dct["tenant"], dct["password"], subscription_id) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: - + """Deserialize credentials from config and cache. + + Loads Azure credentials from either the configuration dictionary + or environment variables, with subscription ID retrieved from cache. + + Args: + config: Configuration dictionary + cache: Cache instance for storing/retrieving cached values + handlers: Logging handlers for error reporting + + Returns: + AzureCredentials instance with loaded configuration. + + Raises: + RuntimeError: If no valid credentials are found in config or environment. + """ cached_config = cache.get_config("azure") ret: AzureCredentials old_subscription_id: Optional[str] = None @@ -101,40 +206,104 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret def serialize(self) -> dict: + """Serialize credentials to dictionary. + + Returns: + Dictionary containing serialized credential data. + """ out = {"subscription_id": self.subscription_id} return out - def update_cache(self, cache_client: Cache): + def update_cache(self, cache_client: Cache) -> None: + """Update credentials in cache. + + Args: + cache_client: Cache instance to update + """ cache_client.update_config(val=self.serialize(), keys=["azure", "credentials"]) class AzureResources(Resources): + """Azure resource management for SeBS benchmarking. + + This class manages Azure cloud resources including storage accounts, + resource groups, and CosmosDB accounts required for serverless function + benchmarking. It handles resource allocation, caching, and lifecycle management. + + Attributes: + _resource_group: Name of the Azure resource group + _storage_accounts: List of storage accounts for function code + _data_storage_account: Storage account for benchmark data + _cosmosdb_account: CosmosDB account for NoSQL storage + """ + class Storage: - def __init__(self, account_name: str, connection_string: str): + """Azure Storage Account wrapper. + + Represents an Azure Storage Account with connection details + for use in serverless function deployment and data storage. + + Attributes: + account_name: Name of the Azure storage account + connection_string: Connection string for accessing the storage account + """ + + def __init__(self, account_name: str, connection_string: str) -> None: + """Initialize Azure Storage account. + + Args: + account_name: Name of the Azure storage account + connection_string: Connection string for storage access + """ super().__init__() self.account_name = account_name self.connection_string = connection_string - # FIXME: 3.7+ migration with future annotations @staticmethod def from_cache(account_name: str, connection_string: str) -> "AzureResources.Storage": + """Create Storage instance from cached data. + + Args: + account_name: Name of the storage account + connection_string: Connection string for the account + + Returns: + New Storage instance with the provided details. + + Raises: + AssertionError: If connection string is empty. + """ assert connection_string, "Empty connection string for account {}".format(account_name) return AzureResources.Storage(account_name, connection_string) @staticmethod def from_allocation(account_name: str, cli_instance: AzureCLI) -> "AzureResources.Storage": + """Create Storage instance from newly allocated account. + + Args: + account_name: Name of the storage account + cli_instance: Azure CLI instance for querying connection string + + Returns: + New Storage instance with queried connection string. + """ connection_string = AzureResources.Storage.query_connection_string( account_name, cli_instance ) ret = AzureResources.Storage(account_name, connection_string) return ret - """ - Query the storage string in Azure using selected storage account. - """ - @staticmethod def query_connection_string(account_name: str, cli_instance: AzureCLI) -> str: + """Query connection string for storage account from Azure. + + Args: + account_name: Name of the storage account + cli_instance: Azure CLI instance for executing queries + + Returns: + Connection string for the storage account. + """ ret = cli_instance.execute( "az storage account show-connection-string --name {}".format(account_name) ) @@ -143,41 +312,75 @@ def query_connection_string(account_name: str, cli_instance: AzureCLI) -> str: return connection_string def serialize(self) -> dict: + """Serialize storage account to dictionary. + + Returns: + Dictionary containing storage account information. + """ return vars(self) @staticmethod def deserialize(obj: dict) -> "AzureResources.Storage": + """Deserialize storage account from dictionary. + + Args: + obj: Dictionary containing storage account data + + Returns: + New Storage instance from dictionary data. + """ return AzureResources.Storage.from_cache(obj["account_name"], obj["connection_string"]) - # FIXME: 3.7 Python, future annotations def __init__( self, resource_group: Optional[str] = None, - storage_accounts: List["AzureResources.Storage"] = [], + storage_accounts: Optional[List["AzureResources.Storage"]] = None, data_storage_account: Optional["AzureResources.Storage"] = None, cosmosdb_account: Optional[CosmosDBAccount] = None, - ): + ) -> None: + """Initialize Azure resources. + + Args: + resource_group: Name of Azure resource group + storage_accounts: List of storage accounts for function code + data_storage_account: Storage account for benchmark data + cosmosdb_account: CosmosDB account for NoSQL operations + """ super().__init__(name="azure") self._resource_group = resource_group - self._storage_accounts = storage_accounts + self._storage_accounts = storage_accounts or [] self._data_storage_account = data_storage_account self._cosmosdb_account = cosmosdb_account - def set_region(self, region: str): + def set_region(self, region: str) -> None: + """Set the Azure region for resource allocation. + + Args: + region: Azure region name (e.g., 'westus2') + """ self._region = region @property def storage_accounts(self) -> List["AzureResources.Storage"]: + """Get list of storage accounts for function code. + + Returns: + List of Storage instances for function deployment. + """ return self._storage_accounts - """ - Locate resource group name in config. - If not found, then create a new resource group with uuid-based name. - - Requires Azure CLI instance in Docker. - """ - def resource_group(self, cli_instance: AzureCLI) -> str: + """Get or create Azure resource group. + + Locates existing resource group or creates a new one with UUID-based name. + The resource group is used to contain all SeBS-related Azure resources. + + Args: + cli_instance: Azure CLI instance for resource operations + + Returns: + Name of the resource group. + """ # Create resource group if not known if not self._resource_group: # Only underscore and alphanumeric characters are allowed @@ -199,7 +402,19 @@ def resource_group(self, cli_instance: AzureCLI) -> str: return self._resource_group def list_resource_groups(self, cli_instance: AzureCLI) -> List[str]: - + """List SeBS resource groups in the current region. + + Queries Azure for existing resource groups that match the SeBS naming pattern. + + Args: + cli_instance: Azure CLI instance for executing queries + + Returns: + List of resource group names matching SeBS pattern. + + Raises: + RuntimeError: If Azure CLI response cannot be parsed. + """ ret = cli_instance.execute( "az group list --query " "\"[?starts_with(name,'sebs_resource_group_') && location=='{0}']\"".format( @@ -214,8 +429,19 @@ def list_resource_groups(self, cli_instance: AzureCLI) -> List[str]: self.logging.error(ret.decode()) raise RuntimeError("Failed to parse response from Azure CLI!") - def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = True): - + def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = True) -> None: + """Delete Azure resource group. + + Removes the specified resource group and all contained resources. + + Args: + cli_instance: Azure CLI instance for executing deletion + name: Name of resource group to delete + wait: Whether to wait for deletion to complete + + Raises: + RuntimeError: If resource group deletion fails. + """ cmd = "az group delete -y --name {0}".format(name) if not wait: cmd += " --no-wait" @@ -225,15 +451,21 @@ def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = self.logging.error(ret.decode()) raise RuntimeError("Failed to delete the resource group!") - """ - Find or create a serverless CosmosDB account. - If not found, then create a new one based on the current resource ID. - Restriction: account names must be globally unique. - - Requires Azure CLI instance in Docker. - """ - def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: + """Get or create CosmosDB account for NoSQL storage. + + Finds existing CosmosDB account or creates a new serverless one. + Account names must be globally unique across Azure. + + Args: + cli_instance: Azure CLI instance for CosmosDB operations + + Returns: + CosmosDBAccount instance for NoSQL operations. + + Raises: + RuntimeError: If CosmosDB account creation or parsing fails. + """ # Create resource group if not known if not self._cosmosdb_account: @@ -274,7 +506,19 @@ def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: return self._cosmosdb_account def list_cosmosdb_accounts(self, cli_instance: AzureCLI) -> Dict[str, str]: - + """List existing CosmosDB accounts in resource group. + + Queries for CosmosDB accounts matching the SeBS naming pattern. + + Args: + cli_instance: Azure CLI instance for executing queries + + Returns: + Dictionary mapping account names to document endpoints. + + Raises: + RuntimeError: If Azure CLI response cannot be parsed. + """ ret = cli_instance.execute( f" az cosmosdb list --resource-group {self._resource_group} " " --query \"[?starts_with(name,'sebs-cosmosdb-account')]\" " @@ -287,13 +531,18 @@ def list_cosmosdb_accounts(self, cli_instance: AzureCLI) -> Dict[str, str]: self.logging.error(ret.decode()) raise RuntimeError("Failed to parse response from Azure CLI!") - """ - Retrieve or create storage account associated with benchmark data. - Last argument allows to override the resource - useful when handling - a single instance through multiple threads using different clients sharing the same cache. - """ - def data_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": + """Get or create storage account for benchmark data. + + Retrieves existing or creates new storage account dedicated to storing + benchmark input/output data. This is separate from function code storage. + + Args: + cli_instance: Azure CLI instance for storage operations + + Returns: + Storage instance for benchmark data operations. + """ if not self._data_storage_account: # remove non-numerical and non-alphabetic characters @@ -304,7 +553,19 @@ def data_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storag return self._data_storage_account def list_storage_accounts(self, cli_instance: AzureCLI) -> List[str]: - + """List storage accounts in the resource group. + + Queries for all storage accounts within the managed resource group. + + Args: + cli_instance: Azure CLI instance for executing queries + + Returns: + List of storage account names. + + Raises: + RuntimeError: If Azure CLI response cannot be parsed. + """ ret = cli_instance.execute( ("az storage account list --resource-group {0}").format( self.resource_group(cli_instance) @@ -318,11 +579,18 @@ def list_storage_accounts(self, cli_instance: AzureCLI) -> List[str]: self.logging.error(ret.decode()) raise RuntimeError("Failed to parse response from Azure CLI!") - """ - Create a new function storage account and add to the list. - """ - def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": + """Create new storage account for function code. + + Creates a new storage account with a UUID-based name for storing + function code packages and adds it to the managed accounts list. + + Args: + cli_instance: Azure CLI instance for storage operations + + Returns: + New Storage instance for function code storage. + """ # Create account. Only alphanumeric characters are allowed # This one is used to store functions code - hence the name. @@ -333,15 +601,21 @@ def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage self._storage_accounts.append(account) return account - """ - Internal implementation of creating a new storage account. - The method does NOT update cache and - does NOT add the account to any resource collection. - """ - def _create_storage_account( self, cli_instance: AzureCLI, account_name: str ) -> "AzureResources.Storage": + """Internal method to create storage account. + + Creates a new Azure storage account with the specified name. + This method does NOT update cache or add to resource collections. + + Args: + cli_instance: Azure CLI instance for storage operations + account_name: Name for the new storage account + + Returns: + New Storage instance for the created account. + """ sku = "Standard_LRS" self.logging.info("Starting allocation of storage account {}.".format(account_name)) cli_instance.execute( @@ -358,20 +632,28 @@ def _create_storage_account( self.logging.info("Storage account {} created.".format(account_name)) return AzureResources.Storage.from_allocation(account_name, cli_instance) - """ - Update the contents of the user cache. - The changes are directly written to the file system. - - Update values: storage accounts, data storage accounts, resource groups. - """ - - def update_cache(self, cache_client: Cache): + def update_cache(self, cache_client: Cache) -> None: + """Update resource configuration in cache. + + Persists current resource state including storage accounts, + data storage accounts, and resource groups to filesystem cache. + + Args: + cache_client: Cache instance for storing configuration + """ super().update_cache(cache_client) cache_client.update_config(val=self.serialize(), keys=["azure", "resources"]) @staticmethod - def initialize(res: Resources, dct: dict): - + def initialize(res: Resources, dct: dict) -> None: + """Initialize resources from dictionary data. + + Populates resource instance with data from configuration dictionary. + + Args: + res: Resources instance to initialize + dct: Dictionary containing resource configuration + """ ret = cast(AzureResources, res) super(AzureResources, AzureResources).initialize(ret, dct) @@ -392,6 +674,11 @@ def initialize(res: Resources, dct: dict): ret._cosmosdb_account = CosmosDBAccount.deserialize(dct["cosmosdb_account"]) def serialize(self) -> dict: + """Serialize resources to dictionary. + + Returns: + Dictionary containing all resource configuration data. + """ out = super().serialize() if len(self._storage_accounts) > 0: out["storage_accounts"] = [x.serialize() for x in self._storage_accounts] @@ -405,7 +692,18 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: - + """Deserialize resources from config and cache. + + Loads Azure resources from cache if available, otherwise from configuration. + + Args: + config: Configuration dictionary + cache: Cache instance for retrieving cached values + handlers: Logging handlers for error reporting + + Returns: + AzureResources instance with loaded configuration. + """ cached_config = cache.get_config("azure") ret = AzureResources() # Load cached values @@ -426,30 +724,72 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AzureConfig(Config): - def __init__(self, credentials: AzureCredentials, resources: AzureResources): + """Complete Azure configuration for SeBS benchmarking. + + Combines Azure credentials and resources into a single configuration + object for managing Azure serverless function deployments. + + Attributes: + _credentials: Azure service principal credentials + _resources: Azure resource management instance + """ + + def __init__(self, credentials: AzureCredentials, resources: AzureResources) -> None: + """Initialize Azure configuration. + + Args: + credentials: Azure service principal credentials + resources: Azure resource management instance + """ super().__init__(name="azure") self._credentials = credentials self._resources = resources @property def credentials(self) -> AzureCredentials: + """Get Azure credentials. + + Returns: + AzureCredentials instance for authentication. + """ return self._credentials @property def resources(self) -> AzureResources: + """Get Azure resources manager. + + Returns: + AzureResources instance for resource management. + """ return self._resources - # FIXME: use future annotations (see sebs/faas/system) @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: dict) -> None: + """Initialize configuration from dictionary data. + + Args: + cfg: Config instance to initialize + dct: Dictionary containing configuration data + """ config = cast(AzureConfig, cfg) config._region = dct["region"] @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: - + """Deserialize complete Azure configuration. + + Creates AzureConfig instance from configuration dictionary and cache, + combining credentials and resources with region information. + + Args: + config: Configuration dictionary + cache: Cache instance for storing/retrieving cached values + handlers: Logging handlers for error reporting + + Returns: + AzureConfig instance with complete Azure configuration. + """ cached_config = cache.get_config("azure") - # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AzureCredentials, AzureCredentials.deserialize(config, cache, handlers)) resources = cast(AzureResources, AzureResources.deserialize(config, cache, handlers)) config_obj = AzureConfig(credentials, resources) @@ -465,19 +805,24 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config resources.set_region(config_obj.region) return config_obj - """ - Update the contents of the user cache. - The changes are directly written to the file system. - - Update values: region. - """ - - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update complete configuration in cache. + + Persists region, credentials, and resources to filesystem cache. + + Args: + cache: Cache instance for storing configuration + """ cache.update_config(val=self.region, keys=["azure", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) def serialize(self) -> dict: + """Serialize complete configuration to dictionary. + + Returns: + Dictionary containing all Azure configuration data. + """ out = { "name": "azure", "region": self._region, diff --git a/sebs/azure/cosmosdb.py b/sebs/azure/cosmosdb.py index 52f8086b..37c2daaa 100644 --- a/sebs/azure/cosmosdb.py +++ b/sebs/azure/cosmosdb.py @@ -1,3 +1,31 @@ +"""Azure CosmosDB integration for SeBS NoSQL benchmarking. + +This module provides Azure CosmosDB integration for NoSQL benchmarks in the +SeBS benchmarking suite. It handles database and container management, +data operations, and resource lifecycle for NoSQL-based benchmarks. + +The module includes: + - BenchmarkResources: Dataclass for managing benchmark-specific resources + - CosmosDB: Main class for CosmosDB operations and management + +Example: + Basic usage for CosmosDB operations: + + ```python + from sebs.azure.cosmosdb import CosmosDB + + # Initialize CosmosDB with account + cosmosdb = CosmosDB(cache, resources, cosmosdb_account) + + # Set up benchmark database and containers + db_name = cosmosdb.benchmark_database("my-benchmark") + tables = cosmosdb.get_tables("my-benchmark") + + # Perform operations + credentials = cosmosdb.credentials() + ``` +""" + from dataclasses import dataclass from typing import cast, Dict, List, Optional, Tuple @@ -13,6 +41,16 @@ @dataclass class BenchmarkResources: + """Resource container for benchmark-specific CosmosDB resources. + + This dataclass holds the database and container names allocated + for a specific benchmark, along with the database client proxy. + + Attributes: + database: Name of the CosmosDB database + containers: List of container names for the benchmark + database_client: CosmosDB database proxy (allocated dynamically) + """ database: str containers: List[str] @@ -20,10 +58,23 @@ class BenchmarkResources: database_client: Optional[DatabaseProxy] = None def serialize(self) -> dict: + """Serialize benchmark resources to dictionary. + + Returns: + Dictionary containing database and container names. + """ return {"database": self.database, "containers": self.containers} @staticmethod def deserialize(config: dict) -> "BenchmarkResources": + """Deserialize benchmark resources from dictionary. + + Args: + config: Dictionary containing resource configuration + + Returns: + BenchmarkResources instance with restored configuration. + """ return BenchmarkResources(database=config["database"], containers=config["containers"]) diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 61ef4c57..0a7eb740 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,8 +1,46 @@ +"""Azure Function implementation for SeBS benchmarking. + +This module provides the Azure-specific implementation of serverless functions +for the SeBS benchmarking suite. It handles Azure Function representation, +serialization, and deserialization with Azure-specific storage configuration. + +The AzureFunction class extends the base Function class to include Azure-specific +attributes like function storage accounts and Azure trigger configurations. + +Example: + Basic usage for creating an Azure Function: + + ```python + from sebs.azure.function import AzureFunction + from sebs.azure.config import AzureResources + from sebs.faas.function import FunctionConfig + + # Create function with Azure-specific storage + function = AzureFunction( + name="my-function", + benchmark="test-benchmark", + code_hash="abc123", + function_storage=storage_account, + cfg=function_config + ) + ``` +""" + from sebs.azure.config import AzureResources from sebs.faas.function import Function, FunctionConfig class AzureFunction(Function): + """Azure Function implementation for SeBS benchmarking. + + This class represents an Azure Function with Azure-specific attributes + and configuration. It includes storage account information and supports + Azure-specific triggers like HTTP triggers. + + Attributes: + function_storage: Azure Storage account used for function code storage + """ + def __init__( self, name: str, @@ -10,11 +48,25 @@ def __init__( code_hash: str, function_storage: AzureResources.Storage, cfg: FunctionConfig, - ): + ) -> None: + """Initialize Azure Function. + + Args: + name: Name of the Azure Function + benchmark: Name of the benchmark this function implements + code_hash: Hash of the function code for caching + function_storage: Azure Storage account for function code + cfg: Function configuration with memory, timeout, etc. + """ super().__init__(benchmark, name, code_hash, cfg) self.function_storage = function_storage def serialize(self) -> dict: + """Serialize function to dictionary. + + Returns: + Dictionary containing function data including Azure-specific storage. + """ return { **super().serialize(), "function_storage": self.function_storage.serialize(), @@ -22,6 +74,20 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: + """Deserialize function from cached configuration. + + Recreates an AzureFunction instance from cached data including + function configuration, storage account, and triggers. + + Args: + cached_config: Dictionary containing cached function data + + Returns: + AzureFunction instance with restored configuration. + + Raises: + AssertionError: If unknown trigger type is encountered. + """ cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( cached_config["name"], diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 4296a588..93f2bb72 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,3 +1,30 @@ +"""Azure Function triggers for SeBS benchmarking. + +This module provides Azure-specific trigger implementations for invoking +serverless functions during benchmarking. It supports HTTP triggers and +integrates with Azure storage for data handling. + +The trigger classes handle function invocation, result processing, and +integration with Azure-specific services like Blob Storage. + +Example: + Basic usage for HTTP trigger: + + ```python + from sebs.azure.triggers import HTTPTrigger + + # Create HTTP trigger with function URL + trigger = HTTPTrigger(function_url, data_storage_account) + + # Synchronous invocation + result = trigger.sync_invoke(payload) + + # Asynchronous invocation + future = trigger.async_invoke(payload) + result = future.result() + ``` +""" + import concurrent.futures from typing import Any, Dict, Optional # noqa @@ -6,41 +33,121 @@ class AzureTrigger(Trigger): - def __init__(self, data_storage_account: Optional[AzureResources.Storage] = None): + """Base class for Azure Function triggers. + + This abstract base class provides common functionality for Azure Function + triggers, including data storage account management for benchmark data + handling. + + Attributes: + _data_storage_account: Azure storage account for benchmark data + """ + + def __init__(self, data_storage_account: Optional[AzureResources.Storage] = None) -> None: + """Initialize Azure trigger. + + Args: + data_storage_account: Optional Azure storage account for data operations + """ super().__init__() self._data_storage_account = data_storage_account @property def data_storage_account(self) -> AzureResources.Storage: + """Get the data storage account. + + Returns: + Azure storage account for benchmark data. + + Raises: + AssertionError: If data storage account is not set. + """ assert self._data_storage_account return self._data_storage_account @data_storage_account.setter - def data_storage_account(self, data_storage_account: AzureResources.Storage): + def data_storage_account(self, data_storage_account: AzureResources.Storage) -> None: + """Set the data storage account. + + Args: + data_storage_account: Azure storage account to set + """ self._data_storage_account = data_storage_account class HTTPTrigger(AzureTrigger): - def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None): + """HTTP trigger for Azure Functions. + + This class implements HTTP-based invocation of Azure Functions, supporting + both synchronous and asynchronous execution patterns for benchmarking. + + Attributes: + url: HTTP endpoint URL for the Azure Function + """ + + def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None) -> None: + """Initialize HTTP trigger. + + Args: + url: HTTP endpoint URL for the Azure Function + data_storage_account: Optional Azure storage account for data operations + """ super().__init__(data_storage_account) self.url = url @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type. + + Returns: + HTTP trigger type identifier. + """ return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: - + """Synchronously invoke Azure Function via HTTP. + + Sends HTTP request to the function endpoint and waits for response. + + Args: + payload: Dictionary payload to send to the function + + Returns: + ExecutionResult containing response data and timing information. + """ return self._http_invoke(payload, self.url) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """Asynchronously invoke Azure Function via HTTP. + + Submits function invocation to a thread pool for parallel execution. + + Args: + payload: Dictionary payload to send to the function + + Returns: + Future object that can be used to retrieve the result. + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """Serialize trigger to dictionary. + + Returns: + Dictionary containing trigger type and URL. + """ return {"type": "HTTP", "url": self.url} @staticmethod def deserialize(obj: dict) -> Trigger: + """Deserialize trigger from dictionary. + + Args: + obj: Dictionary containing trigger data + + Returns: + HTTPTrigger instance with restored configuration. + """ return HTTPTrigger(obj["url"]) diff --git a/sebs/cache.py b/sebs/cache.py index f690e747..185f0d9b 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -1,3 +1,20 @@ +"""Caching system for SeBS (Serverless Benchmarking Suite). + +This module provides comprehensive caching functionality for the SeBS framework, +including configuration caching, code package management, function deployment +tracking, and storage resource management. + +The Cache class manages persistent storage of benchmark configurations, compiled +code packages, Docker containers, deployed functions, and cloud resource +configurations to optimize repeated benchmark executions and deployments. + +Example: + Basic cache usage: + cache = Cache("/path/to/cache", docker_client) + config = cache.get_benchmark_config("aws", "110.dynamic-html") + cache.add_code_package("aws", benchmark_instance) +""" + # https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth import collections.abc import docker @@ -15,7 +32,19 @@ from sebs.faas.function import Function -def update(d, u): +def update(d: Dict[str, Any], u: Dict[str, Any]) -> Dict[str, Any]: + """Recursively update nested dictionary with another dictionary. + + This function performs deep merge of two dictionaries, updating nested + dictionary values rather than replacing them entirely. + + Args: + d (Dict[str, Any]): The target dictionary to update. + u (Dict[str, Any]): The source dictionary with updates. + + Returns: + Dict[str, Any]: The updated dictionary. + """ for k, v in u.items(): if isinstance(v, collections.abc.Mapping): d[k] = update(d.get(k, {}), v) @@ -24,8 +53,18 @@ def update(d, u): return d -def update_dict(cfg, val, keys): - def map_keys(obj, val, keys): +def update_dict(cfg: Dict[str, Any], val: Any, keys: List[str]) -> None: + """Update dictionary value at nested key path. + + Updates a nested dictionary by setting a value at a path specified + by a list of keys. Creates intermediate dictionaries as needed. + + Args: + cfg (Dict[str, Any]): The dictionary to update. + val (Any): The value to set at the key path. + keys (List[str]): List of keys forming the path to the target location. + """ + def map_keys(obj: Dict[str, Any], val: Any, keys: List[str]) -> Dict[str, Any]: if len(keys): return {keys[0]: map_keys(obj, val, keys[1:])} else: @@ -35,14 +74,36 @@ def map_keys(obj, val, keys): class Cache(LoggingBase): - cached_config: Dict[str, str] = {} - """ - Indicate that cloud offerings updated credentials or settings. - Thus we have to write down changes. + """Persistent caching system for SeBS benchmark configurations and deployments. + + This class provides comprehensive caching functionality for SeBS benchmarks, + including configuration management, code package storage, function tracking, + and cloud resource management. It uses a file-based cache system with + thread-safe operations. + + Attributes: + cached_config (Dict[str, Any]): In-memory cache of cloud configurations. + config_updated (bool): Flag indicating if configuration needs to be saved. + cache_dir (str): Absolute path to the cache directory. + ignore_functions (bool): Flag to skip function caching operations. + ignore_storage (bool): Flag to skip storage resource caching. + docker_client (docker.DockerClient): Docker client for container operations. """ - config_updated = False - - def __init__(self, cache_dir: str, docker_client: docker.DockerClient): + + cached_config: Dict[str, Any] = {} + config_updated: bool = False + + def __init__(self, cache_dir: str, docker_client: docker.DockerClient) -> None: + """Initialize the Cache with directory and Docker client. + + Sets up the cache directory structure and loads existing configurations. + Creates the cache directory if it doesn't exist, otherwise loads + existing cached configurations. + + Args: + cache_dir (str): Path to the cache directory. + docker_client (docker.DockerClient): Docker client for container operations. + """ super().__init__() self.docker_client = docker_client self.cache_dir = os.path.abspath(cache_dir) @@ -56,36 +117,66 @@ def __init__(self, cache_dir: str, docker_client: docker.DockerClient): @staticmethod def typename() -> str: + """Get the typename for this cache. + + Returns: + str: The cache type name. + """ return "Benchmark" - def load_config(self): + def load_config(self) -> None: + """Load cached cloud configurations from disk. + + Reads configuration files for all supported cloud platforms from + the cache directory and loads them into memory. + """ with self._lock: for cloud in ["azure", "aws", "gcp", "openwhisk", "local"]: cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) if os.path.exists(cloud_config_file): - self.cached_config[cloud] = json.load(open(cloud_config_file, "r")) - - def get_config(self, cloud): + with open(cloud_config_file, "r") as f: + self.cached_config[cloud] = json.load(f) + + def get_config(self, cloud: str) -> Optional[Dict[str, Any]]: + """Get cached configuration for a specific cloud provider. + + Args: + cloud (str): Cloud provider name (e.g., 'aws', 'azure', 'gcp'). + + Returns: + Optional[Dict[str, Any]]: The cached configuration or None if not found. + """ return self.cached_config[cloud] if cloud in self.cached_config else None - """ - Update config values. Sets flag to save updated content in the end. - val: new value to store - keys: array of consecutive keys for multi-level dictionary - """ - - def update_config(self, val, keys): + def update_config(self, val: Any, keys: List[str]) -> None: + """Update configuration values at nested key path. + + Updates cached configuration by setting a value at the specified + nested key path. Sets the config_updated flag to ensure changes + are persisted to disk. + + Args: + val (Any): New value to store. + keys (List[str]): Array of consecutive keys for multi-level dictionary. + """ with self._lock: update_dict(self.cached_config, val, keys) self.config_updated = True - def lock(self): + def lock(self) -> None: + """Acquire the cache lock for thread-safe operations.""" self._lock.acquire() - def unlock(self): + def unlock(self) -> None: + """Release the cache lock.""" self._lock.release() - def shutdown(self): + def shutdown(self) -> None: + """Save cached configurations to disk if they were updated. + + Writes all updated cloud configurations back to their respective + JSON files in the cache directory. + """ if self.config_updated: for cloud in ["azure", "aws", "gcp", "openwhisk", "local"]: if cloud in self.cached_config: @@ -94,32 +185,24 @@ def shutdown(self): with open(cloud_config_file, "w") as out: json.dump(self.cached_config[cloud], out, indent=2) - """ - Access cached config of a benchmark. - - :param deployment: allowed deployment clouds or local - :param benchmark: - :param language: - - :return: a JSON config or None when not exists - """ - - def get_benchmark_config(self, deployment: str, benchmark: str): + def get_benchmark_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """Access cached configuration of a benchmark. + + Args: + deployment (str): Deployment platform ('aws', 'azure', 'gcp', 'openwhisk', 'local'). + benchmark (str): Benchmark name (e.g., '110.dynamic-html'). + + Returns: + Optional[Dict[str, Any]]: Benchmark configuration or None if not found. + """ benchmark_dir = os.path.join(self.cache_dir, benchmark) if os.path.exists(benchmark_dir): - with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: - cfg = json.load(fp) - return cfg[deployment] if deployment in cfg else None - - """ - Access cached version of benchmark code. - - :param deployment: allowed deployment clouds or local - :param benchmark: - :param language: - - :return: a tuple of JSON config and absolute path to code or None - """ + config_file = os.path.join(benchmark_dir, "config.json") + if os.path.exists(config_file): + with open(config_file, "r") as fp: + cfg = json.load(fp) + return cfg[deployment] if deployment in cfg else None + return None def get_code_package( self, @@ -129,6 +212,18 @@ def get_code_package( language_version: str, architecture: str, ) -> Optional[Dict[str, Any]]: + """Access cached version of benchmark code package. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + language (str): Programming language. + language_version (str): Language version. + architecture (str): Target architecture. + + Returns: + Optional[Dict[str, Any]]: Code package configuration or None if not found. + """ cfg = self.get_benchmark_config(deployment, benchmark) key = f"{language_version}-{architecture}" @@ -145,6 +240,18 @@ def get_container( language_version: str, architecture: str, ) -> Optional[Dict[str, Any]]: + """Access cached container configuration for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + language (str): Programming language. + language_version (str): Language version. + architecture (str): Target architecture. + + Returns: + Optional[Dict[str, Any]]: Container configuration or None if not found. + """ cfg = self.get_benchmark_config(deployment, benchmark) key = f"{language_version}-{architecture}" @@ -156,57 +263,108 @@ def get_container( def get_functions( self, deployment: str, benchmark: str, language: str ) -> Optional[Dict[str, Any]]: + """Get cached function configurations for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + language (str): Programming language. + + Returns: + Optional[Dict[str, Any]]: Function configurations or None if not found. + """ cfg = self.get_benchmark_config(deployment, benchmark) if cfg and language in cfg and not self.ignore_functions: return cfg[language]["functions"] else: return None - """ - Access cached storage config of a benchmark. - - :param deployment: allowed deployment clouds or local - :param benchmark: - - :return: a JSON config or None - """ - - def get_storage_config(self, deployment: str, benchmark: str): + def get_storage_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """Access cached storage configuration of a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + + Returns: + Optional[Dict[str, Any]]: Storage configuration or None if not found. + """ return self._get_resource_config(deployment, benchmark, "storage") - def get_nosql_config(self, deployment: str, benchmark: str): + def get_nosql_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """Access cached NoSQL configuration of a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + + Returns: + Optional[Dict[str, Any]]: NoSQL configuration or None if not found. + """ return self._get_resource_config(deployment, benchmark, "nosql") - def _get_resource_config(self, deployment: str, benchmark: str, resource: str): + def _get_resource_config(self, deployment: str, benchmark: str, resource: str) -> Optional[Dict[str, Any]]: + """Get cached resource configuration for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + resource (str): Resource type ('storage' or 'nosql'). + + Returns: + Optional[Dict[str, Any]]: Resource configuration or None if not found. + """ cfg = self.get_benchmark_config(deployment, benchmark) return cfg[resource] if cfg and resource in cfg and not self.ignore_storage else None - def update_storage(self, deployment: str, benchmark: str, config: dict): + def update_storage(self, deployment: str, benchmark: str, config: Dict[str, Any]) -> None: + """Update cached storage configuration for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + config (Dict[str, Any]): Storage configuration to cache. + """ if self.ignore_storage: return self._update_resources(deployment, benchmark, "storage", config) - def update_nosql(self, deployment: str, benchmark: str, config: dict): + def update_nosql(self, deployment: str, benchmark: str, config: Dict[str, Any]) -> None: + """Update cached NoSQL configuration for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + config (Dict[str, Any]): NoSQL configuration to cache. + """ if self.ignore_storage: return self._update_resources(deployment, benchmark, "nosql", config) - def _update_resources(self, deployment: str, benchmark: str, resource: str, config: dict): + def _update_resources(self, deployment: str, benchmark: str, resource: str, config: Dict[str, Any]) -> None: + """Update cached resource configuration for a benchmark. + + This method handles caching of resource configurations (storage, nosql) + for benchmarks. It creates the benchmark directory if it doesn't exist + and updates the configuration file. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + resource (str): Resource type ('storage' or 'nosql'). + config (Dict[str, Any]): Resource configuration to cache. + """ if self.ignore_storage: return - """ - We are now preparing benchmark data before caching function. - Thus, we have to take over a situation where the cache directory does not exist. - """ - benchmark_dir = os.path.join(self.cache_dir, benchmark) os.makedirs(benchmark_dir, exist_ok=True) with self._lock: - if os.path.exists(os.path.join(benchmark_dir, "config.json")): - with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: + config_file = os.path.join(benchmark_dir, "config.json") + if os.path.exists(config_file): + with open(config_file, "r") as fp: cached_config = json.load(fp) else: cached_config = {} @@ -216,14 +374,26 @@ def _update_resources(self, deployment: str, benchmark: str, resource: str, conf else: cached_config[deployment] = {resource: config} - with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: + with open(config_file, "w") as fp: json.dump(cached_config, fp, indent=2) def add_code_package( self, deployment_name: str, code_package: "Benchmark", - ): + ) -> None: + """Add a new code package to the cache. + + Caches a compiled benchmark code package (either directory or ZIP file) + along with its configuration. Handles both package and container deployments. + + Args: + deployment_name (str): Name of the deployment platform. + code_package (Benchmark): The benchmark code package to cache. + + Raises: + RuntimeError: If cached application already exists for the deployment. + """ with self._lock: language = code_package.language_name language_version = code_package.language_version @@ -331,7 +501,16 @@ def update_code_package( self, deployment_name: str, code_package: "Benchmark", - ): + ) -> None: + """Update an existing code package in the cache. + + Updates cached code package with new content and metadata. If the + cached package doesn't exist, adds it as a new package. + + Args: + deployment_name (str): Name of the deployment platform. + code_package (Benchmark): The benchmark code package to update. + """ with self._lock: language = code_package.language_name language_version = code_package.language_version @@ -392,24 +571,27 @@ def update_code_package( else: self.add_code_package(deployment_name, code_package) - """ - Add new function to cache. - - :param deployment: - :param benchmark: - :param language: - :param code_package: Path to directory/ZIP with code. - :param language_config: Configuration of language and code. - :param storage_config: Configuration of storage buckets. - """ - def add_function( self, deployment_name: str, language_name: str, code_package: "Benchmark", function: "Function", - ): + ) -> None: + """Add new function to cache. + + Caches a deployed function configuration for a benchmark. Links the + function to its corresponding code package. + + Args: + deployment_name (str): Name of the deployment platform. + language_name (str): Programming language name. + code_package (Benchmark): The benchmark code package. + function (Function): The deployed function to cache. + + Raises: + RuntimeError: If code package doesn't exist in cache. + """ if self.ignore_functions: return with self._lock: @@ -436,7 +618,18 @@ def add_function( "Can't cache function {} for a non-existing code package!".format(function.name) ) - def update_function(self, function: "Function"): + def update_function(self, function: "Function") -> None: + """Update an existing function in the cache. + + Updates cached function configuration with new metadata. Searches + across all deployments and languages to find the function by name. + + Args: + function (Function): The function with updated configuration. + + Raises: + RuntimeError: If function's code package doesn't exist in cache. + """ if self.ignore_functions: return with self._lock: diff --git a/sebs/config.py b/sebs/config.py index c3030ea0..6a8d5b56 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -1,3 +1,14 @@ +"""Configuration management for SeBS (Serverless Benchmarking Suite). + +This module provides configuration management functionality for the SeBS framework, +including system configuration loading, Docker image management, and deployment +setting retrieval from the systems.json configuration file. + +The SeBSConfig class serves as the central configuration manager that provides +access to platform-specific settings, language configurations, and deployment +options across different cloud providers and local deployments. +""" + import json from typing import Dict, List, Optional @@ -5,23 +16,67 @@ class SeBSConfig: - def __init__(self): + """Central configuration manager for SeBS framework. + + This class manages all configuration settings for the SeBS benchmarking suite, + including system configurations, Docker settings, deployment options, and + platform-specific parameters. It loads configuration from systems.json and + provides convenient access methods for various configuration aspects. + + Attributes: + _system_config (Dict): The loaded system configuration from systems.json. + _image_tag_prefix (str): Custom prefix for Docker image tags. + """ + + def __init__(self) -> None: + """Initialize SeBSConfig by loading system configuration. + + Loads the systems.json configuration file and initializes the image tag prefix. + + Raises: + FileNotFoundError: If systems.json configuration file is not found. + json.JSONDecodeError: If systems.json contains invalid JSON. + """ with open(project_absolute_path("config", "systems.json"), "r") as cfg: self._system_config = json.load(cfg) self._image_tag_prefix = "" @property def image_tag_prefix(self) -> str: + """Get the current Docker image tag prefix. + + Returns: + str: The current image tag prefix. + """ return self._image_tag_prefix @image_tag_prefix.setter - def image_tag_prefix(self, tag: str): + def image_tag_prefix(self, tag: str) -> None: + """Set the Docker image tag prefix. + + Args: + tag (str): The prefix to use for Docker image tags. + """ self._image_tag_prefix = tag def docker_repository(self) -> str: + """Get the Docker repository name from configuration. + + Returns: + str: The Docker repository name configured in systems.json. + """ return self._system_config["general"]["docker_repository"] def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[str, str]: + """Get deployment packages for a specific deployment and language. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + Dict[str, str]: Dictionary mapping package names to their versions. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "packages" ] @@ -29,42 +84,118 @@ def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[ def deployment_module_packages( self, deployment_name: str, language_name: str ) -> Dict[str, str]: + """Get deployment module packages for a specific deployment and language. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + Dict[str, str]: Dictionary mapping module package names to their versions. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "module_packages" ] def deployment_files(self, deployment_name: str, language_name: str) -> List[str]: + """Get deployment files list for a specific deployment and language. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + List[str]: List of required deployment files. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "files" ] def docker_image_types(self, deployment_name: str, language_name: str) -> List[str]: + """Get available Docker image types for a deployment and language. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + List[str]: List of available Docker image types. + """ return self._system_config[deployment_name]["languages"][language_name]["images"] def supported_language_versions( self, deployment_name: str, language_name: str, architecture: str ) -> List[str]: + """Get supported language versions for a deployment, language, and architecture. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + architecture (str): Target architecture (e.g., 'x64', 'arm64'). + + Returns: + List[str]: List of supported language versions. + """ languages = self._system_config.get(deployment_name, {}).get("languages", {}) base_images = languages.get(language_name, {}).get("base_images", {}) return list(base_images.get(architecture, {}).keys()) def supported_architecture(self, deployment_name: str) -> List[str]: + """Get supported architectures for a deployment platform. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + + Returns: + List[str]: List of supported architectures (e.g., ['x64', 'arm64']). + """ return self._system_config[deployment_name]["architecture"] def supported_package_deployment(self, deployment_name: str) -> bool: + """Check if package-based deployment is supported for a platform. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + + Returns: + bool: True if package deployment is supported, False otherwise. + """ return "package" in self._system_config[deployment_name]["deployments"] def supported_container_deployment(self, deployment_name: str) -> bool: + """Check if container-based deployment is supported for a platform. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + + Returns: + bool: True if container deployment is supported, False otherwise. + """ return "container" in self._system_config[deployment_name]["deployments"] def benchmark_base_images( self, deployment_name: str, language_name: str, architecture: str ) -> Dict[str, str]: + """Get base Docker images for benchmarks on a specific platform. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + architecture (str): Target architecture (e.g., 'x64', 'arm64'). + + Returns: + Dict[str, str]: Dictionary mapping language versions to base image names. + """ return self._system_config[deployment_name]["languages"][language_name]["base_images"][ architecture ] def version(self) -> str: + """Get the SeBS framework version. + + Returns: + str: The SeBS version string, or 'unknown' if not configured. + """ return self._system_config["general"].get("SeBS_version", "unknown") def benchmark_image_name( @@ -76,7 +207,19 @@ def benchmark_image_name( architecture: str, registry: Optional[str] = None, ) -> str: - + """Generate full Docker image name for a benchmark. + + Args: + system (str): Deployment system name (e.g., 'aws', 'azure'). + benchmark (str): Benchmark name (e.g., '110.dynamic-html'). + language_name (str): Programming language name (e.g., 'python'). + language_version (str): Language version (e.g., '3.8'). + architecture (str): Target architecture (e.g., 'x64'). + registry (Optional[str]): Docker registry URL. If None, uses default repository. + + Returns: + str: Complete Docker image name including registry and tag. + """ tag = self.benchmark_image_tag( system, benchmark, language_name, language_version, architecture ) @@ -94,6 +237,21 @@ def benchmark_image_tag( language_version: str, architecture: str, ) -> str: + """Generate Docker image tag for a benchmark. + + Creates a standardized tag format that includes system, benchmark, language, + version, architecture, optional prefix, and SeBS version. + + Args: + system (str): Deployment system name (e.g., 'aws', 'azure'). + benchmark (str): Benchmark name (e.g., '110.dynamic-html'). + language_name (str): Programming language name (e.g., 'python'). + language_version (str): Language version (e.g., '3.8'). + architecture (str): Target architecture (e.g., 'x64'). + + Returns: + str: Generated Docker image tag. + """ tag = f"function.{system}.{benchmark}.{language_name}-{language_version}-{architecture}" if self.image_tag_prefix: tag = f"{tag}-{self.image_tag_prefix}" @@ -102,4 +260,13 @@ def benchmark_image_tag( return tag def username(self, deployment_name: str, language_name: str) -> str: + """Get the username for a specific deployment and language configuration. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + str: The username configured for the deployment and language combination. + """ return self._system_config[deployment_name]["languages"][language_name]["username"] diff --git a/sebs/experiments/environment.py b/sebs/experiments/environment.py index 86576f11..ec30bab4 100644 --- a/sebs/experiments/environment.py +++ b/sebs/experiments/environment.py @@ -1,17 +1,51 @@ -from typing import List +"""Environment management for experiment execution. -from sebs.utils import execute +This module provides the ExperimentEnvironment class for managing CPU settings +and system configuration during benchmark experiments. It handles: -""" - Assumes that all cores are online in the beginning. - TODO: use lscpu to discover online cores +- CPU frequency scaling and governor management +- Hyperthreading control (enable/disable) +- CPU boost control +- Memory management (page cache dropping) +- Intel CPU-specific optimizations + +Currently supports only Intel CPUs with the intel_pstate driver. - Currently supports only Intel CPUs with intel_pstate driver. +Note: + This module assumes that all CPU cores are online at initialization. + Future versions should use lscpu to discover online cores dynamically. """ +from typing import Dict, List + +from sebs.utils import execute + class ExperimentEnvironment: - def __init__(self): + """Environment management for benchmark experiments. + + This class provides methods to control CPU settings, memory management, + and other system configurations that can affect benchmark results. + It focuses on creating a stable, reproducible environment for experiments. + + Attributes: + _cpu_mapping: Dictionary mapping physical cores to logical cores + _vendor: CPU vendor identifier (currently only "intel" supported) + _governor: CPU frequency scaling governor (e.g., "intel_pstate") + _prev_boost_status: Previous boost status for restoration + _prev_min_freq: Previous minimum frequency setting for restoration + """ + def __init__(self) -> None: + """Initialize the experiment environment. + + Discovers CPU topology, checks vendor compatibility, and verifies + the CPU frequency scaling driver. Currently only supports Intel CPUs + with the intel_pstate driver. + + Raises: + NotImplementedError: If CPU vendor is not Intel or scaling driver + is not intel_pstate + """ # find CPU mapping ret = execute('cat /proc/cpuinfo | grep -e "processor" -e "core id"', shell=True) # skip empty line at the end @@ -28,7 +62,7 @@ def __init__(self): for cpu_id in range(1, number_of_cores) ] - self._cpu_mapping = {} + self._cpu_mapping: Dict[int, List[Dict[str, int]]] = {} # iterate over every two elements i na list for logical_core, physical_core in zip(*[iter(mapping)] * 2): core_description = { @@ -42,7 +76,7 @@ def __init__(self): vendor = execute('lscpu | grep -e "Vendor ID"', shell=True).split(";")[1] if vendor == "GenuineIntel": - self._vendor = "intel" + self._vendor: str = "intel" else: raise NotImplementedError() @@ -50,11 +84,17 @@ def __init__(self): scaling_governor_path = "/sys/devices/system/cpu/cpu{cpu_id}/cpufreq/scaling_driver" governor = execute("cat {path}".format(path=scaling_governor_path)) if governor == "intel_pstate": - self._governor = governor + self._governor: str = governor else: raise NotImplementedError() - def write_cpu_status(self, cores: List[int], status: int): + def write_cpu_status(self, cores: List[int], status: int) -> None: + """Write CPU online status for specified cores. + + Args: + cores: List of physical core IDs to modify + status: Status to set (0 for offline, 1 for online) + """ cpu_status_path = "/sys/devices/system/cpu/cpu{cpu_id}/online" for core in cores: @@ -66,13 +106,31 @@ def write_cpu_status(self, cores: List[int], status: int): shell=True, ) - def disable_hyperthreading(self, cores: List[int]): + def disable_hyperthreading(self, cores: List[int]) -> None: + """Disable hyperthreading for specified cores. + + Args: + cores: List of physical core IDs to disable hyperthreading for + """ self.write_cpu_status(cores, 0) - def enable_hyperthreading(self, cores: List[int]): + def enable_hyperthreading(self, cores: List[int]) -> None: + """Enable hyperthreading for specified cores. + + Args: + cores: List of physical core IDs to enable hyperthreading for + """ self.write_cpu_status(cores, 1) - def disable_boost(self, cores: List[int]): + def disable_boost(self, cores: List[int]) -> None: + """Disable CPU boost (turbo) for specified cores. + + Args: + cores: List of physical core IDs to disable boost for + + Raises: + NotImplementedError: If CPU governor is not intel_pstate + """ if self._governor == "intel_pstate": boost_path = "/sys/devices/system/cpu/intel_pstate" self._prev_boost_status = execute("cat " + boost_path) @@ -80,7 +138,17 @@ def disable_boost(self, cores: List[int]): else: raise NotImplementedError() - def enable_boost(self, cores: List[int]): + def enable_boost(self, cores: List[int]) -> None: + """Enable CPU boost (turbo) for specified cores. + + Restores the previous boost status that was saved when boost was disabled. + + Args: + cores: List of physical core IDs to enable boost for + + Raises: + NotImplementedError: If CPU governor is not intel_pstate + """ if self._governor == "intel_pstate": boost_path = "/sys/devices/system/cpu/intel_pstate" execute( @@ -91,25 +159,63 @@ def enable_boost(self, cores: List[int]): else: raise NotImplementedError() - def drop_page_cache(self): + def drop_page_cache(self) -> None: + """Drop system page cache to ensure clean memory state. + + This method clears the page cache to prevent cached data from + affecting benchmark measurements. + """ execute("echo 3 | sudo tee /proc/sys/vm/drop_caches") - def set_frequency(self, max_freq: int): + def set_frequency(self, max_freq: int) -> None: + """Set minimum CPU frequency percentage. + + Args: + max_freq: Minimum frequency percentage (0-100) + """ path = "/sys/devices/system/cpu/intel_pstate/min_perf_pct" self._prev_min_freq = execute("cat " + path) execute("echo {freq} | sudo tee {path}".format(freq=max_freq, path=path)) - def unset_frequency(self): + def unset_frequency(self) -> None: + """Restore previous minimum CPU frequency setting. + + Restores the frequency setting that was saved when set_frequency + was called. + """ path = "/sys/devices/system/cpu/intel_pstate/min_perf_pct" execute("echo {freq} | sudo tee {path}".format(freq=self._prev_min_freq, path=path)) - def setup_benchmarking(self, cores: List[int]): + def setup_benchmarking(self, cores: List[int]) -> None: + """Set up the environment for stable benchmarking. + + This method applies a standard set of optimizations to create + a stable environment for benchmarking: + - Disables CPU boost/turbo + - Disables hyperthreading + - Sets CPU frequency to maximum + - Drops page cache + + Args: + cores: List of physical core IDs to configure + """ self.disable_boost(cores) self.disable_hyperthreading(cores) self.set_frequency(100) self.drop_page_cache() - def after_benchmarking(self, cores: List[int]): + def after_benchmarking(self, cores: List[int]) -> None: + """Restore environment settings after benchmarking. + + This method restores the system to its previous state after + benchmarking is complete: + - Re-enables CPU boost/turbo + - Re-enables hyperthreading + - Restores frequency settings + + Args: + cores: List of physical core IDs to restore + """ self.enable_boost(cores) self.enable_hyperthreading(cores) self.unset_frequency() diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 766a7615..dc3c231c 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -102,7 +102,7 @@ def typename() -> str: return "Experiment.EvictionModel" @staticmethod - def accept_replies(port: int, invocations: int): + def accept_replies(port: int, invocations: int) -> None: """Accept TCP connections from functions and respond to them. This static method acts as a TCP server, accepting connections from @@ -153,7 +153,26 @@ def accept_replies(port: int, invocations: int): s.close() @staticmethod - def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payload: dict): + def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payload: dict) -> dict: + """Execute a single instance of the eviction model test. + + This method performs two invocations of a function with a sleep interval + between them. The first invocation should be a cold start, and the second + will indicate whether the container was evicted during the sleep period. + + Args: + sleep_time: Time to sleep between invocations (seconds) + pid: Process ID for logging + tid: Thread ID for logging + func: Function to invoke + payload: Payload to send to the function + + Returns: + Dictionary with invocation results and timing information + + Raises: + RuntimeError: If the first invocation fails + """ try: print(f"Process {pid} Thread {tid} Invoke function {func.name} with {payload} now!") @@ -171,7 +190,7 @@ def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payloa time_spent = float(datetime.now().strftime("%s.%f")) - float(end.strftime("%s.%f")) seconds_sleep = sleep_time - time_spent - print(f"PID {pid} TID {tid} with time {time}, sleep {seconds_sleep}") + print(f"PID {pid} TID {tid} with time {sleep_time}, sleep {seconds_sleep}") time.sleep(seconds_sleep) try: @@ -197,7 +216,27 @@ def process_function( functions: List[Function], times: List[int], payload: dict, - ): + ) -> List[dict]: + """Process a function with multiple time intervals. + + This method executes multiple functions with different sleep times + in parallel, starting with the largest sleep time to overlap executions. + The total time should be equal to the maximum execution time. + + Args: + repetition: Current repetition number + pid: Process ID for logging + invocations: Number of invocations to perform + functions: List of functions to invoke + times: List of sleep times corresponding to functions + payload: Payload to send to functions + + Returns: + List of dictionaries containing invocation results + + Raises: + RuntimeError: If any execution fails + """ b = multiprocessing.Semaphore(invocations) print(f"Begin at PID {pid}, repetition {repetition}") @@ -234,7 +273,7 @@ def process_function( raise RuntimeError() return final_results - def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. This method sets up the benchmark, functions, and output directory for @@ -274,7 +313,17 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): # continue self.functions.append(deployment_client.get_function(self._benchmark, func_name=fname)) - def run(self): + def run(self) -> None: + """Execute the eviction model experiment. + + This method runs the main eviction model experiment by: + 1. Setting up server instances to handle function responses + 2. Executing parallel invocations with different sleep times + 3. Collecting and storing results + + The experiment determines container eviction patterns by measuring + whether functions experience cold starts after different idle periods. + """ settings = self.config.experiment_settings(self.name()) invocations = settings["invocations"] diff --git a/sebs/experiments/experiment.py b/sebs/experiments/experiment.py index 5d50ead6..34e775d5 100644 --- a/sebs/experiments/experiment.py +++ b/sebs/experiments/experiment.py @@ -13,8 +13,7 @@ - Type and name identification for experiments """ -from abc import ABC -from abc import abstractmethod +from abc import ABC, abstractmethod from multiprocessing import Semaphore # from multiprocessing.pool import ThreadPool diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index 083e6bd4..dfeb5e39 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -18,7 +18,7 @@ import random import time from datetime import datetime -from typing import Dict, TYPE_CHECKING +from typing import Dict, List, TYPE_CHECKING from sebs.benchmark import Benchmark from sebs.faas.system import System as FaaSSystem @@ -76,7 +76,13 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings self._deployment_client = deployment_client self._benchmark = benchmark - def before_sample(self, size: int, input_benchmark: dict): + def before_sample(self, size: int, input_benchmark: dict) -> None: + """Prepare the benchmark with a specific code package size. + + Args: + size: Size of the code package to create + input_benchmark: Benchmark input configuration (unused) + """ arr = bytearray((random.getrandbits(8) for i in range(size))) self._benchmark.code_package_modify("randomdata.bin", bytes(arr)) function = self._deployment_client.get_function(self._benchmark) @@ -84,7 +90,21 @@ def before_sample(self, size: int, input_benchmark: dict): class PayloadSize: - def __init__(self, settings: dict): + """Helper class for payload size experiments. + + This class handles creating different payload sizes to measure the impact + of input data size on function invocation overhead. + + Attributes: + pts: List of payload sizes to test + """ + def __init__(self, settings: dict) -> None: + """Initialize a new payload size experiment. + + Args: + settings: Experiment settings with payload_begin, payload_end, + and payload_points values + """ from numpy import linspace points = linspace( @@ -94,7 +114,13 @@ def __init__(self, settings: dict): ) self.pts = [int(pt) for pt in points] - def before_sample(self, size: int, input_benchmark: dict): + def before_sample(self, size: int, input_benchmark: dict) -> None: + """Prepare the benchmark input with a specific payload size. + + Args: + size: Size of the payload to create + input_benchmark: Benchmark input configuration to modify + """ import base64 from io import BytesIO @@ -131,7 +157,7 @@ def __init__(self, config: ExperimentConfig): super().__init__(config) self.settings = self.config.experiment_settings(self.name()) - def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. This method sets up the benchmark, function, storage, and output directory @@ -177,7 +203,15 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): self._deployment_client = deployment_client - def run(self): + def run(self) -> None: + """Execute the invocation overhead experiment. + + This method runs the main experiment by: + 1. Setting up either code package size or payload size experiments + 2. Running warm-up and cold start invocations + 3. Measuring invocation overhead for different sizes + 4. Collecting and storing results in CSV format + """ from requests import get @@ -250,7 +284,20 @@ def process( directory: str, logging_filename: str, extend_time_interval: int, - ): + ) -> None: + """Process experiment results and generate summary statistics. + + This method processes the raw experiment results by: + 1. Loading timing data from CSV files + 2. Computing clock drift and round-trip time + 3. Creating a processed results file with invocation times + + Args: + sebs_client: SeBS client instance + deployment_client: Deployment client instance + directory: Directory containing experiment results + logging_filename: Name of the logging file (unused) + """ import pandas as pd import glob from sebs import SeBS # noqa @@ -312,7 +359,27 @@ def process( invocation_time = float(row[5]) - float(row[4]) - float(row[3]) + clock_drift writer.writerow(row + [clock_drift, clock_drift_std, invocation_time]) - def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, ip: str): + def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, ip: str) -> List: + """Receive UDP datagrams from the function for clock synchronization. + + This method implements a UDP server that communicates with the function + to measure clock synchronization and network timing. It receives + datagrams from the function and responds to them, measuring timing + information. + + Args: + input_benchmark: Benchmark input configuration + repetitions: Number of repetitions to perform + port: UDP port to listen on + ip: IP address of the client + + Returns: + List containing invocation results: [is_cold, connection_time, + start_timestamp, finish_timestamp, request_id] + + Raises: + RuntimeError: If function invocation fails + """ import socket diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index d4995ae1..a65cf521 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -58,7 +58,7 @@ def __init__(self, config: ExperimentConfig): """ super().__init__(config) - def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. This method sets up the benchmark, function, triggers, storage, and output @@ -96,7 +96,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): if len(triggers) == 0: deployment_client.create_trigger(self._function, Trigger.TriggerType.HTTP) - def run(self): + def run(self) -> None: """Run the network ping-pong experiment. This method executes the experiment, measuring network latency and @@ -123,7 +123,7 @@ def run(self): time.sleep(5) self._storage.download_bucket(self.benchmark_input["output-bucket"], self._out_dir) - def process(self, directory: str): + def process(self, directory: str) -> None: """Process the experiment results. This method processes the CSV files generated during the experiment @@ -158,7 +158,7 @@ def process(self, directory: str): fig = ax.get_figure() fig.savefig(os.path.join(directory, "histogram.png")) - def receive_datagrams(self, repetitions: int, port: int, ip: str): + def receive_datagrams(self, repetitions: int, port: int, ip: str) -> None: """Receive UDP datagrams from the function and respond to them. This method acts as a UDP server, receiving datagrams from the function diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index fdb61a6b..d3018547 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -103,7 +103,7 @@ def str(self) -> str: """ return self.name.lower() - def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. This method sets up the benchmark, function, trigger, and output @@ -148,7 +148,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): self._deployment_client = deployment_client self._sebs_client = sebs_client - def run(self): + def run(self) -> None: """Run the experiment. This method runs the experiment with the configured settings. @@ -177,7 +177,7 @@ def run(self): # Run experiment with this memory configuration self.run_configuration(settings, settings["repetitions"], suffix=str(memory)) - def compute_statistics(self, times: List[float]): + def compute_statistics(self, times: List[float]) -> None: """Compute statistical analysis of execution times. This method computes basic statistics (mean, median, standard deviation, @@ -221,7 +221,7 @@ def _run_configuration( invocations: int, repetitions: int, suffix: str = "", - ): + ) -> None: """Run a specific experiment configuration. This method executes the experiment with the specified run type, @@ -345,7 +345,7 @@ def _run_configuration( ) ) - def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): + def run_configuration(self, settings: dict, repetitions: int, suffix: str = "") -> None: """Run experiments for each configured experiment type. This method runs the experiment for each experiment type specified @@ -404,7 +404,7 @@ def process( directory: str, logging_filename: str, extend_time_interval: int, - ): + ) -> None: """Process experiment results and generate a CSV report. This method processes the experiment results, downloads additional diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index 9a8d22f2..0c5981ec 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -69,21 +69,21 @@ def __init__( self._metrics = metrics self.result_bucket = result_bucket - def begin(self): + def begin(self) -> None: """Mark the beginning of the experiment. This method records the start time of the experiment. """ self.begin_time = datetime.now().timestamp() - def end(self): + def end(self) -> None: """Mark the end of the experiment. This method records the end time of the experiment. """ self.end_time = datetime.now().timestamp() - def times(self) -> Tuple[int, int]: + def times(self) -> Tuple[float, float]: """Get the start and end times of the experiment. Returns: @@ -91,7 +91,7 @@ def times(self) -> Tuple[int, int]: """ return self.begin_time, self.end_time - def add_result_bucket(self, result_bucket: str): + def add_result_bucket(self, result_bucket: str) -> None: """Set the result bucket for storing experiment results. Args: @@ -99,7 +99,7 @@ def add_result_bucket(self, result_bucket: str): """ self.result_bucket = result_bucket - def add_invocation(self, func: Function, invocation: ExecutionResult): + def add_invocation(self, func: Function, invocation: ExecutionResult) -> None: """Add an invocation result for a specific function. If the invocation doesn't have a request ID (likely due to failure), diff --git a/sebs/experiments/startup_time.py b/sebs/experiments/startup_time.py index 3b7e9520..203c7ae5 100644 --- a/sebs/experiments/startup_time.py +++ b/sebs/experiments/startup_time.py @@ -1,15 +1,94 @@ +"""Startup time measurement experiment implementation. + +This module provides the StartupTime experiment implementation, which measures +the startup and initialization time of serverless functions. This experiment +focuses on measuring: + +- Cold start initialization time +- Container startup overhead +- Runtime initialization time +- Language-specific startup costs + +The experiment is designed to isolate and measure the time it takes for +a serverless platform to initialize a new container and runtime environment. +""" + +from typing import TYPE_CHECKING + from sebs.experiments.experiment import Experiment from sebs.experiments.config import Config as ExperimentConfig +if TYPE_CHECKING: + from sebs import SeBS + from sebs.faas.system import System as FaaSSystem + class StartupTime(Experiment): - def __init__(self, config: ExperimentConfig): + """Startup time measurement experiment. + + This experiment measures the startup and initialization time of serverless + functions, focusing on cold start performance. It isolates the time spent + in container initialization, runtime startup, and function loading. + + The experiment can be used to compare startup times across different: + - Programming languages and runtimes + - Memory configurations + - Code package sizes + - Platform configurations + + Attributes: + config: Experiment configuration settings + """ + def __init__(self, config: ExperimentConfig) -> None: + """Initialize a new StartupTime experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "startup-time" + """ return "startup-time" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.StartupTime" + """ return "Experiment.StartupTime" + + def prepare(self, sebs_client: "SeBS", deployment_client: "FaaSSystem") -> None: + """Prepare the experiment for execution. + + This method sets up the experiment by preparing the benchmark function + and configuring the necessary resources for measuring startup time. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + + Note: + This experiment is currently a placeholder and needs implementation. + """ + # TODO: Implement startup time experiment preparation + pass + + def run(self) -> None: + """Execute the startup time experiment. + + This method runs the experiment to measure function startup times, + enforcing cold starts and measuring initialization overhead. + + Note: + This experiment is currently a placeholder and needs implementation. + """ + # TODO: Implement startup time experiment execution + pass diff --git a/sebs/gcp/__init__.py b/sebs/gcp/__init__.py index f76e7c75..032b66b6 100644 --- a/sebs/gcp/__init__.py +++ b/sebs/gcp/__init__.py @@ -1,3 +1,44 @@ +"""Google Cloud Platform (GCP) integration for SeBS. + +This package provides comprehensive Google Cloud Platform support for the +Serverless Benchmarking Suite, including Cloud Functions deployment, Cloud Storage +for object storage, Firestore/Datastore for NoSQL operations, and Cloud Monitoring +for performance metrics collection. + +The package includes: +- Function deployment and management via Cloud Functions API +- Object storage through Google Cloud Storage buckets +- NoSQL database operations using Firestore in Datastore mode +- Performance monitoring via Cloud Monitoring and Cloud Logging +- Docker-based gcloud CLI integration for administrative operations +- Comprehensive credential and resource management + +Modules: + gcp: Main GCP system implementation + config: Configuration and credential management + storage: Cloud Storage integration + function: Cloud Function representation + triggers: Function invocation triggers + datastore: Firestore/Datastore NoSQL implementation + resources: System resource management + cli: gcloud CLI integration + +Example: + Basic GCP system setup: + + from sebs.gcp import GCP, GCPConfig + + # Configure GCP with credentials + config = GCPConfig.deserialize(config_dict, cache, handlers) + + # Initialize GCP system + gcp_system = GCP(system_config, config, cache, docker_client, handlers) + gcp_system.initialize() + + # Deploy a function + function = gcp_system.create_function(benchmark, "my-function", False, "") +""" + from .gcp import GCP # noqa from .config import GCPConfig # noqa from .storage import GCPStorage # noqa diff --git a/sebs/gcp/cli.py b/sebs/gcp/cli.py index 65ca33bc..c39311aa 100644 --- a/sebs/gcp/cli.py +++ b/sebs/gcp/cli.py @@ -1,5 +1,24 @@ +"""Google Cloud CLI integration for SeBS. + +This module provides a Docker-based Google Cloud CLI interface for performing +administrative operations that require the gcloud command-line tool. It manages +a containerized gcloud environment with proper authentication and project setup. + +Classes: + GCloudCLI: Docker-based gcloud CLI interface for GCP operations + +Example: + Using the gcloud CLI interface: + + cli = GCloudCLI(credentials, system_config, docker_client) + cli.login(project_name) + result = cli.execute("gcloud functions list") + cli.shutdown() +""" + import logging import os +from typing import Union import docker @@ -9,13 +28,40 @@ class GCloudCLI(LoggingBase): + """Docker-based Google Cloud CLI interface. + + Provides a containerized environment for executing gcloud commands with + proper authentication and project configuration. Uses a Docker container + with the gcloud CLI pre-installed and configured. + + Attributes: + docker_instance: Running Docker container with gcloud CLI + """ @staticmethod def typename() -> str: + """Get the type name for this CLI implementation. + + Returns: + Type name string for GCP CLI + """ return "GCP.CLI" def __init__( self, credentials: GCPCredentials, system_config: SeBSConfig, docker_client: docker.client - ): + ) -> None: + """Initialize the gcloud CLI Docker container. + + Sets up a Docker container with the gcloud CLI, pulling the image if needed + and mounting the GCP credentials file for authentication. + + Args: + credentials: GCP credentials with service account file path + system_config: SeBS system configuration + docker_client: Docker client for container management + + Raises: + RuntimeError: If Docker image pull fails + """ super().__init__() @@ -56,12 +102,18 @@ def __init__( # except StopIteration: # pass - """ - Execute the given command in Azure CLI. - Throws an exception on failure (commands are expected to execute succesfully). - """ - - def execute(self, cmd: str): + def execute(self, cmd: str) -> bytes: + """Execute a command in the gcloud CLI container. + + Args: + cmd: Command string to execute in the container + + Returns: + Command output as bytes + + Raises: + RuntimeError: If the command fails (non-zero exit code) + """ exit_code, out = self.docker_instance.exec_run(cmd) if exit_code != 0: raise RuntimeError( @@ -71,27 +123,28 @@ def execute(self, cmd: str): ) return out - """ - Run gcloud auth command on Docker instance. - - Important: we cannot run "init" as this always requires authenticating through a browser. - Instead, we authenticate as a service account. - - Setting cloud project will show a warning about missing permissions - for Cloud Resource Manager API: I don't know why, we don't seem to need it. - - Because of that, it will ask for verification to continue - which we do by passing "Y". - """ - - def login(self, project_name: str): + def login(self, project_name: str) -> None: + """Authenticate gcloud CLI and set the active project. + + Performs service account authentication using the mounted credentials file + and sets the specified project as the active project. Automatically confirms + any prompts that may appear during project setup. + + Args: + project_name: GCP project ID to set as active + + Note: + Uses service account authentication instead of browser-based auth. + May show warnings about Cloud Resource Manager API permissions. + """ self.execute("gcloud auth login --cred-file=/credentials.json") self.execute(f"/bin/bash -c 'gcloud config set project {project_name} <<< Y'") self.logging.info("gcloud CLI login succesful") - """ - Shuts down the Docker instance. - """ - - def shutdown(self): + def shutdown(self) -> None: + """Shutdown the gcloud CLI Docker container. + + Stops and removes the Docker container used for gcloud operations. + """ self.logging.info("Stopping gcloud CLI manage Docker instance") self.docker_instance.stop() diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 56d3b5c4..4dfff710 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -1,6 +1,31 @@ +"""Configuration classes for Google Cloud Platform (GCP) integration. + +This module provides configuration classes for GCP serverless benchmarking, +including credentials management, resource allocation, and cloud region configuration. +It handles authentication through service account JSON files and manages project-specific +settings required for Cloud Functions deployment and execution. + +The module supports multiple credential sources in priority order: +1. User-provided credentials in configuration +2. Cached credentials from previous sessions +3. Environment variables (GOOGLE_APPLICATION_CREDENTIALS, GCP_SECRET_APPLICATION_CREDENTIALS) + +Classes: + GCPCredentials: Handles authentication and project identification + GCPResources: Manages allocated cloud resources + GCPConfig: Main configuration container for GCP deployment + +Example: + Basic GCP configuration setup: + + credentials = GCPCredentials("/path/to/service-account.json") + resources = GCPResources() + config = GCPConfig(credentials, resources) +""" + import json import os -from typing import cast, List, Optional, Tuple +from typing import cast, Dict, List, Optional, Tuple from sebs.cache import Cache from sebs.faas.config import Config, Credentials, Resources @@ -9,20 +34,36 @@ # FIXME: Replace type hints for static generators after migration to 3.7 # https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel -""" - Credentials for FaaS system used to authorize operations on functions - and other resources. - - The order of credentials initialization: - 1. Load credentials from cache. - 2. If any new values are provided in the config, they override cache values. - 3. If nothing is provided, initialize using environmental variables. - 4. If no information is provided, then failure is reported. -""" - class GCPCredentials(Credentials): - def __init__(self, gcp_credentials: str): + """Credentials manager for Google Cloud Platform authentication. + + Handles authentication to GCP services using service account JSON files. + Automatically extracts project ID from credentials and manages environment + variable setup for Google Cloud SDK authentication. + + The class supports multiple credential sources in priority order: + 1. User-provided credentials file path + 2. Cached credentials from previous sessions + 3. GOOGLE_APPLICATION_CREDENTIALS environment variable + 4. GCP_SECRET_APPLICATION_CREDENTIALS environment variable + + Attributes: + _gcp_credentials: Path to the service account JSON file + _project_id: GCP project ID extracted from credentials + """ + + def __init__(self, gcp_credentials: str) -> None: + """Initialize GCP credentials with service account file. + + Args: + gcp_credentials: Path to the GCP service account JSON file + + Raises: + FileNotFoundError: If the credentials file doesn't exist + json.JSONDecodeError: If the credentials file is not valid JSON + KeyError: If the credentials file doesn't contain project_id + """ super().__init__() self._gcp_credentials = gcp_credentials @@ -32,18 +73,56 @@ def __init__(self, gcp_credentials: str): @property def gcp_credentials(self) -> str: + """Get the path to the GCP service account credentials file. + + Returns: + Path to the service account JSON file + """ return self._gcp_credentials @property def project_name(self) -> str: + """Get the GCP project ID from the credentials. + + Returns: + The GCP project ID string + """ return self._project_id @staticmethod def initialize(gcp_credentials: str) -> "GCPCredentials": + """Create a new GCPCredentials instance. + + Args: + gcp_credentials: Path to the GCP service account JSON file + + Returns: + A new GCPCredentials instance + """ return GCPCredentials(gcp_credentials) @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """Deserialize GCP credentials from configuration and cache. + + Loads credentials from multiple sources in priority order: + 1. User-provided config with credentials-json path + 2. Cached credentials from previous sessions + 3. GOOGLE_APPLICATION_CREDENTIALS environment variable + 4. GCP_SECRET_APPLICATION_CREDENTIALS environment variable + + Args: + config: Configuration dictionary potentially containing credentials + cache: Cache instance for storing/retrieving credentials + handlers: Logging handlers for error reporting + + Returns: + Initialized GCPCredentials instance + + Raises: + RuntimeError: If no valid credentials are found or if project ID + mismatch occurs between cache and new credentials + """ cached_config = cache.get_config("gcp") ret: GCPCredentials @@ -84,46 +163,80 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret - """ - Serialize to JSON for storage in cache. - """ - - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize credentials to dictionary for cache storage. + + Returns: + Dictionary containing project_id for cache storage + """ out = {"project_id": self._project_id} return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update the cache with current credential information. + + Args: + cache: Cache instance to update with project ID + """ cache.update_config(val=self._project_id, keys=["gcp", "credentials", "project_id"]) -""" - Class grouping resources allocated at the FaaS system to execute functions - and deploy various services. Examples might include IAM roles and API gateways - for HTTP triggers. - - Storage resources are handled seperately. -""" - - class GCPResources(Resources): - def __init__(self): + """Resource manager for Google Cloud Platform serverless resources. + + Manages cloud resources allocated for function execution and deployment, + such as IAM roles, API gateways for HTTP triggers, and other GCP-specific + infrastructure components. Storage resources are handled separately. + + This class extends the base Resources class with GCP-specific resource + management capabilities and handles serialization/deserialization for + cache persistence. + + Attributes: + Inherits all attributes from the base Resources class + """ + def __init__(self) -> None: + """Initialize GCP resources manager.""" super().__init__(name="gcp") @staticmethod - def initialize(res: Resources, dct: dict): + def initialize(res: Resources, dct: Dict) -> "GCPResources": + """Initialize GCP resources from a dictionary configuration. + + Args: + res: Base Resources instance to initialize + dct: Dictionary containing resource configuration + + Returns: + Initialized GCPResources instance + """ ret = cast(GCPResources, res) super(GCPResources, GCPResources).initialize(ret, dct) return ret - """ - Serialize to JSON for storage in cache. - """ - - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize resources to dictionary for cache storage. + + Returns: + Dictionary representation of resources for cache storage + """ return super().serialize() @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": + def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": + """Deserialize GCP resources from configuration and cache. + + Loads resources from cache if available, otherwise initializes from + user configuration or creates empty resource set. + + Args: + config: Configuration dictionary potentially containing resources + cache: Cache instance for storing/retrieving resources + handlers: Logging handlers for status reporting + + Returns: + Initialized GCPResources instance + """ cached_config = cache.get_config("gcp") ret = GCPResources() @@ -144,43 +257,97 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resou return ret - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update the cache with current resource information. + + Args: + cache: Cache instance to update with resource data + """ super().update_cache(cache) -""" - FaaS system config defining cloud region (if necessary), credentials and - resources allocated. -""" - - class GCPConfig(Config): + """Main configuration class for Google Cloud Platform deployment. + + Combines credentials and resources into a complete configuration for + GCP serverless function deployment. Manages cloud region settings, + authentication, and resource allocation for the benchmarking suite. + + This class handles serialization/deserialization for cache persistence + and provides validation for configuration consistency across sessions. + + Attributes: + _project_name: GCP project identifier + _region: GCP region for resource deployment + _credentials: GCP authentication credentials + _resources: Allocated GCP resources + """ _project_name: str - def __init__(self, credentials: GCPCredentials, resources: GCPResources): + def __init__(self, credentials: GCPCredentials, resources: GCPResources) -> None: + """Initialize GCP configuration with credentials and resources. + + Args: + credentials: GCP authentication credentials + resources: GCP resource allocation settings + """ super().__init__(name="gcp") self._credentials = credentials self._resources = resources @property def region(self) -> str: + """Get the GCP region for resource deployment. + + Returns: + GCP region identifier (e.g., 'us-central1') + """ return self._region @property def project_name(self) -> str: + """Get the GCP project name from credentials. + + Returns: + GCP project identifier string + """ return self.credentials.project_name @property def credentials(self) -> GCPCredentials: + """Get the GCP credentials instance. + + Returns: + GCP authentication credentials + """ return self._credentials @property def resources(self) -> GCPResources: + """Get the GCP resources instance. + + Returns: + GCP resource allocation settings + """ return self._resources @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Config": + def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Config": + """Deserialize GCP configuration from dictionary and cache. + + Loads complete GCP configuration including credentials and resources. + Validates consistency between cached and provided configuration values, + updating cache with new user-provided values when they differ. + + Args: + config: Configuration dictionary with GCP settings + cache: Cache instance for storing/retrieving configuration + handlers: Logging handlers for status reporting + + Returns: + Initialized GCPConfig instance + """ cached_config = cache.get_config("gcp") credentials = cast(GCPCredentials, GCPCredentials.deserialize(config, cache, handlers)) resources = cast(GCPResources, GCPResources.deserialize(config, cache, handlers)) @@ -213,11 +380,23 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi return config_obj @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: Dict) -> None: + """Initialize GCP configuration from dictionary. + + Args: + cfg: Config instance to initialize (will be cast to GCPConfig) + dct: Dictionary containing configuration values including region + """ config = cast(GCPConfig, cfg) config._region = dct["region"] - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize configuration to dictionary for cache storage. + + Returns: + Dictionary containing complete GCP configuration including + name, region, credentials, and resources + """ out = { "name": "gcp", "region": self._region, @@ -226,7 +405,14 @@ def serialize(self) -> dict: } return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update cache with current configuration values. + + Updates region, credentials, and resources in the cache. + + Args: + cache: Cache instance to update with configuration data + """ cache.update_config(val=self.region, keys=["gcp", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) diff --git a/sebs/gcp/datastore.py b/sebs/gcp/datastore.py index ae747fb1..8605e075 100644 --- a/sebs/gcp/datastore.py +++ b/sebs/gcp/datastore.py @@ -1,3 +1,21 @@ +"""Google Cloud Datastore/Firestore implementation for SeBS NoSQL storage. + +This module provides NoSQL database functionality using Google Cloud Firestore +in Datastore mode. It manages database allocation, table creation, and data +operations for benchmarks requiring NoSQL storage capabilities. + +Classes: + BenchmarkResources: Resource configuration for benchmark databases + Datastore: NoSQL storage implementation using Google Cloud Firestore + +Example: + Using Datastore for benchmark NoSQL operations: + + datastore = Datastore(cli_instance, cache, resources, region) + table_name = datastore.create_table("benchmark-name", "user-data", "user_id") + datastore.write_to_table("benchmark-name", table_name, data, primary_key, secondary_key) +""" + from dataclasses import dataclass from typing import Dict, List, Tuple, Optional @@ -11,32 +29,84 @@ @dataclass class BenchmarkResources: + """Resource configuration for a benchmark's Datastore database. + + Tracks the allocated database name, table kinds, and client instance + for a specific benchmark's NoSQL storage requirements. + + Attributes: + database: Name of the Firestore database in Datastore mode + kinds: List of entity kinds (table equivalents) in the database + database_client: Optional Datastore client instance (allocated dynamically) + """ database: str kinds: List[str] # We allocate this dynamically - ignore when caching database_client: Optional[datastore.Client] = None - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize benchmark resources for cache storage. + + Returns: + Dictionary containing database name and kinds list + """ return {"database": self.database, "kinds": self.kinds} @staticmethod - def deserialize(config: dict) -> "BenchmarkResources": + def deserialize(config: Dict) -> "BenchmarkResources": + """Deserialize benchmark resources from cached configuration. + + Args: + config: Dictionary containing cached resource configuration + + Returns: + BenchmarkResources instance with database and kinds + """ return BenchmarkResources(database=config["database"], kinds=config["kinds"]) class Datastore(NoSQLStorage): + """Google Cloud Firestore/Datastore implementation for NoSQL storage. + + Provides NoSQL database functionality using Google Cloud Firestore in + Datastore mode. Manages database allocation, entity kind creation, and + data operations for benchmarks requiring NoSQL capabilities. + + Attributes: + _cli_instance: gcloud CLI interface for database management + _region: GCP region for database allocation + _benchmark_resources: Mapping of benchmarks to their database resources + """ @staticmethod def typename() -> str: + """Get the type name for this NoSQL storage implementation. + + Returns: + Type name string for GCP Datastore + """ return "GCP.Datastore" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment name for this NoSQL storage implementation. + + Returns: + Deployment name string 'gcp' + """ return "gcp" def __init__( self, cli_instance: GCloudCLI, cache_client: Cache, resources: Resources, region: str - ): + ) -> None: + """Initialize Datastore NoSQL storage manager. + + Args: + cli_instance: gcloud CLI interface for database operations + cache_client: Cache instance for storing resource state + resources: Resource configuration + region: GCP region for database allocation + """ super().__init__(region, cache_client, resources) self._cli_instance = cli_instance self._region = region @@ -44,14 +114,30 @@ def __init__( # Mapping: benchmark -> Datastore database self._benchmark_resources: Dict[str, BenchmarkResources] = {} - """ - GCP requires no table mappings: the name of "kind" is the same as benchmark name. - """ - def get_tables(self, benchmark: str) -> Dict[str, str]: + """Get table name mappings for a benchmark. + + GCP Datastore requires no table mappings as the entity kind name + is the same as the benchmark table name. + + Args: + benchmark: Name of the benchmark + + Returns: + Empty dictionary (no mappings needed for GCP) + """ return {} def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """Get the actual table name for a benchmark table. + + Args: + benchmark: Name of the benchmark + table: Logical table name + + Returns: + Table name if it exists in benchmark resources, None otherwise + """ if benchmark not in self._benchmark_resources: return None @@ -62,6 +148,14 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: return table def retrieve_cache(self, benchmark: str) -> bool: + """Retrieve benchmark resources from cache. + + Args: + benchmark: Name of the benchmark to retrieve resources for + + Returns: + True if resources were found in cache, False otherwise + """ if benchmark in self._benchmark_resources: return True @@ -73,23 +167,48 @@ def retrieve_cache(self, benchmark: str) -> bool: return False - def update_cache(self, benchmark: str): + def update_cache(self, benchmark: str) -> None: + """Update cache with current benchmark resources. + + Args: + benchmark: Name of the benchmark to cache resources for + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, self._benchmark_resources[benchmark].serialize() ) def benchmark_database(self, benchmark: str) -> str: + """Get the database name for a benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Database name for the benchmark's NoSQL resources + """ return self._benchmark_resources[benchmark].database def write_to_table( self, benchmark: str, table: str, - data: dict, + data: Dict, primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, - ): + ) -> None: + """Write data to a Datastore entity kind (table). + + Args: + benchmark: Name of the benchmark + table: Name of the table (entity kind) + data: Dictionary of data to write + primary_key: Primary key tuple (name, value) + secondary_key: Secondary key tuple (name, value) - required for GCP + + Raises: + AssertionError: If secondary_key is None (required for GCP) + """ res = self._benchmark_resources[benchmark] table_name = self._get_table_name(benchmark, table) @@ -117,6 +236,23 @@ def write_to_table( def create_table( self, benchmark: str, name: str, primary_key: str, _: Optional[str] = None ) -> str: + """Create a new entity kind (table) in Datastore. + + Creates a new Firestore database in Datastore mode if needed, and adds + the specified entity kind to the benchmark's resource configuration. + + Args: + benchmark: Name of the benchmark + name: Name of the entity kind (table) to create + primary_key: Primary key field name + _: Unused parameter for compatibility + + Returns: + Name of the created entity kind + + Raises: + RuntimeError: If database operations fail + """ benchmark_resources = self._benchmark_resources.get(benchmark, None) @@ -174,7 +310,29 @@ def create_table( return name def clear_table(self, name: str) -> str: + """Clear all entities from a table. + + Args: + name: Name of the table to clear + + Returns: + Table name + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """Remove a table from the database. + + Args: + name: Name of the table to remove + + Returns: + Table name + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 6736c1ca..242dbbbb 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -1,4 +1,20 @@ -from typing import cast, Optional +"""Google Cloud Platform function implementation for SeBS. + +This module provides the GCPFunction class that represents a Cloud Function +deployed on Google Cloud Platform. It handles function metadata, serialization, +deserialization, and bucket management for code deployment. + +Classes: + GCPFunction: Represents a deployed Google Cloud Function with GCP-specific features + +Example: + Creating a GCP function instance: + + config = FunctionConfig(memory=256, timeout=60, runtime="python39") + function = GCPFunction("my-function", "benchmark-name", "hash123", config) +""" + +from typing import cast, Dict, Optional from sebs.faas.config import Resources from sebs.faas.function import Function, FunctionConfig @@ -6,6 +22,14 @@ class GCPFunction(Function): + """Represents a Google Cloud Function with GCP-specific functionality. + + Extends the base Function class with GCP-specific features like bucket + management for code storage and GCP-specific serialization/deserialization. + + Attributes: + bucket: Cloud Storage bucket name containing the function's code + """ def __init__( self, name: str, @@ -13,22 +37,55 @@ def __init__( code_package_hash: str, cfg: FunctionConfig, bucket: Optional[str] = None, - ): + ) -> None: + """Initialize a GCP Cloud Function instance. + + Args: + name: Function name on GCP + benchmark: Name of the benchmark this function implements + code_package_hash: Hash of the code package for version tracking + cfg: Function configuration (memory, timeout, etc.) + bucket: Optional Cloud Storage bucket name for code storage + """ super().__init__(benchmark, name, code_package_hash, cfg) self.bucket = bucket @staticmethod def typename() -> str: + """Get the type name for this function implementation. + + Returns: + Type name string for GCP functions + """ return "GCP.GCPFunction" - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize function to dictionary for cache storage. + + Returns: + Dictionary containing function state including bucket information + """ return { **super().serialize(), "bucket": self.bucket, } @staticmethod - def deserialize(cached_config: dict) -> "GCPFunction": + def deserialize(cached_config: Dict) -> "GCPFunction": + """Deserialize function from cached configuration. + + Reconstructs a GCPFunction instance from cached data including + triggers and configuration. Handles both Library and HTTP triggers. + + Args: + cached_config: Dictionary containing cached function configuration + + Returns: + Reconstructed GCPFunction instance with triggers + + Raises: + AssertionError: If an unknown trigger type is encountered + """ from sebs.faas.function import Trigger from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger @@ -49,7 +106,19 @@ def deserialize(cached_config: dict) -> "GCPFunction": ret.add_trigger(trigger_type.deserialize(trigger)) return ret - def code_bucket(self, benchmark: str, storage_client: GCPStorage): + def code_bucket(self, benchmark: str, storage_client: GCPStorage) -> str: + """Get or create the Cloud Storage bucket for function code. + + Returns the bucket name where the function's code is stored, + creating a deployment bucket if none is assigned. + + Args: + benchmark: Benchmark name (unused but kept for compatibility) + storage_client: GCP storage client for bucket operations + + Returns: + Cloud Storage bucket name containing function code + """ if not self.bucket: self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 815ba36f..b6215169 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -1,3 +1,30 @@ +"""Google Cloud Platform (GCP) serverless system implementation. + +This module provides the main GCP implementation for the Serverless Benchmarking Suite, +including function deployment, management, monitoring, and resource allocation. +It integrates with Google Cloud Functions, Cloud Storage, Cloud Monitoring, and +Cloud Logging to provide comprehensive serverless benchmarking capabilities. + +The module handles: +- Function creation, updating, and lifecycle management +- Code packaging and deployment to Cloud Functions +- HTTP and library trigger management +- Performance metrics collection via Cloud Monitoring +- Execution logs retrieval via Cloud Logging +- Cold start enforcement for benchmarking +- Storage bucket management for code deployment + +Classes: + GCP: Main system class implementing the FaaS System interface + +Example: + Basic GCP system initialization: + + config = GCPConfig(credentials, resources) + gcp_system = GCP(system_config, config, cache, docker_client, logging_handlers) + gcp_system.initialize() +""" + import docker import os import logging @@ -25,16 +52,21 @@ from sebs.gcp.function import GCPFunction from sebs.utils import LoggingHandlers -""" - This class provides basic abstractions for the FaaS system. - It provides the interface for initialization of the system and storage - services, creation and update of serverless functions and querying - logging and measurements services to obtain error messages and performance - measurements. -""" - class GCP(System): + """Google Cloud Platform serverless system implementation. + + Provides complete integration with Google Cloud Functions including deployment, + monitoring, logging, and resource management. Handles code packaging, function + lifecycle management, trigger creation, and performance metrics collection. + + Attributes: + _config: GCP-specific configuration including credentials and region + function_client: Google Cloud Functions API client + cold_start_counter: Counter for enforcing cold starts in benchmarking + logging_handlers: Logging configuration for status reporting + """ + def __init__( self, system_config: SeBSConfig, @@ -42,7 +74,16 @@ def __init__( cache_client: Cache, docker_client: docker.client, logging_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize GCP serverless system. + + Args: + system_config: General SeBS system configuration + config: GCP-specific configuration with credentials and settings + cache_client: Cache instance for storing function and resource state + docker_client: Docker client for container operations (if needed) + logging_handlers: Logging configuration for status reporting + """ super().__init__( system_config, cache_client, @@ -56,38 +97,77 @@ def __init__( @property def config(self) -> GCPConfig: + """Get the GCP configuration instance. + + Returns: + GCP configuration with credentials and region settings + """ return self._config @staticmethod - def name(): + def name() -> str: + """Get the platform name identifier. + + Returns: + Platform name string 'gcp' + """ return "gcp" @staticmethod - def typename(): + def typename() -> str: + """Get the platform type name for display. + + Returns: + Platform type string 'GCP' + """ return "GCP" @staticmethod def function_type() -> "Type[Function]": + """Get the function class type for this platform. + + Returns: + GCPFunction class type + """ return GCPFunction - """ - Initialize the system. After the call the local or remote - FaaS system should be ready to allocate functions, manage - storage resources and invoke functions. - - :param config: systems-specific parameters - """ - - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None) -> None: + """Initialize the GCP system for function deployment and management. + + Sets up the Cloud Functions API client and initializes system resources + including storage buckets and other required infrastructure. + + Args: + config: Additional system-specific configuration parameters + resource_prefix: Optional prefix for resource naming to avoid conflicts + """ self.function_client = build("cloudfunctions", "v1", cache_discovery=False) self.initialize_resources(select_prefix=resource_prefix) def get_function_client(self): + """Get the Google Cloud Functions API client. + + Returns: + Initialized Cloud Functions API client + """ return self.function_client def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """Generate a default function name for the given benchmark. + + Creates a standardized function name using resource ID, benchmark name, + language, and version information. Formats the name according to GCP + Cloud Functions naming requirements. + + Args: + code_package: Benchmark package containing metadata + resources: Optional resource configuration for ID generation + + Returns: + Formatted function name suitable for GCP Cloud Functions + """ # Create function name resource_id = resources.resources_id if resources else self.config.resources.resources_id func_name = "sebs-{}-{}-{}-{}".format( @@ -100,27 +180,24 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """Format function name according to GCP Cloud Functions requirements. + + Converts function names to comply with GCP naming rules by replacing + hyphens and dots with underscores. GCP functions must begin with a letter + and can only contain letters, numbers, and underscores. + + Args: + func_name: Raw function name to format + + Returns: + GCP-compliant function name + """ # GCP functions must begin with a letter # however, we now add by default `sebs` in the beginning func_name = func_name.replace("-", "_") func_name = func_name.replace(".", "_") return func_name - """ - Apply the system-specific code packaging routine to build benchmark. - The benchmark creates a code directory with the following structure: - - [benchmark sources] - - [benchmark resources] - - [dependence specification], e.g. requirements.txt or package.json - - [handlers implementation for the language and deployment] - - This step allows us to change the structure above to fit different - deployment requirements, Example: a zip file for AWS or a specific - directory structure for Azure. - - :return: path to packaged code and its size - """ - def package_code( self, directory: str, @@ -131,6 +208,33 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """Package benchmark code for GCP Cloud Functions deployment. + + Transforms the benchmark code directory structure to meet GCP Cloud Functions + requirements. Creates a zip archive with the appropriate handler file naming + and directory structure for the specified language runtime. + + The packaging process: + 1. Creates a 'function' subdirectory for benchmark sources + 2. Renames handler files to GCP-required names (handler.py -> main.py) + 3. Creates a zip archive for deployment + 4. Restores original file structure + + Args: + directory: Path to the benchmark code directory + language_name: Programming language (python, nodejs) + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (x86_64, arm64) + benchmark: Benchmark name for archive naming + is_cached: Whether this package is from cache + container_deployment: Whether to use container deployment (unsupported) + + Returns: + Tuple of (archive_path, archive_size_bytes, container_uri) + + Raises: + NotImplementedError: If container_deployment is True + """ container_uri = "" @@ -190,6 +294,25 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "GCPFunction": + """Create a new GCP Cloud Function or update existing one. + + Deploys a benchmark as a Cloud Function, handling code upload to Cloud Storage, + function creation with proper configuration, and IAM policy setup for + unauthenticated invocations. If the function already exists, updates it instead. + + Args: + code_package: Benchmark package with code and configuration + func_name: Name for the Cloud Function + container_deployment: Whether to use container deployment (unsupported) + container_uri: Container image URI (unused for GCP) + + Returns: + GCPFunction instance representing the deployed function + + Raises: + NotImplementedError: If container_deployment is True + RuntimeError: If function creation or IAM configuration fails + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in GCP") @@ -311,6 +434,21 @@ def create_function( return function def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """Create a trigger for the given function. + + Creates HTTP triggers for Cloud Functions, waiting for function deployment + to complete before extracting the trigger URL. + + Args: + function: Function instance to create trigger for + trigger_type: Type of trigger to create (only HTTP supported) + + Returns: + Created trigger instance with URL and configuration + + Raises: + RuntimeError: If trigger type is not supported + """ from sebs.gcp.triggers import HTTPTrigger if trigger_type == Trigger.TriggerType.HTTP: @@ -341,7 +479,15 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) self.cache_client.update_function(function) return trigger - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """Configure a cached function instance for use. + + Sets up library triggers for functions loaded from cache, ensuring + they have the proper deployment client and logging configuration. + + Args: + function: Cached function instance to configure + """ from sebs.faas.function import Trigger from sebs.gcp.triggers import LibraryTrigger @@ -357,7 +503,23 @@ def update_function( code_package: Benchmark, container_deployment: bool, container_uri: str, - ): + ) -> None: + """Update an existing Cloud Function with new code and configuration. + + Uploads new code package to Cloud Storage and patches the existing function + with updated runtime, memory, timeout, and environment variables. Waits + for deployment to complete before returning. + + Args: + function: Existing function instance to update + code_package: New benchmark package with updated code + container_deployment: Whether to use container deployment (unsupported) + container_uri: Container image URI (unused) + + Raises: + NotImplementedError: If container_deployment is True + RuntimeError: If function update fails after maximum retries + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in GCP") @@ -418,7 +580,19 @@ def update_function( ) self.logging.info("Published new function code and configuration.") - def _update_envs(self, full_function_name: str, envs: dict) -> dict: + def _update_envs(self, full_function_name: str, envs: Dict) -> Dict: + """Merge new environment variables with existing function environment. + + Retrieves current function environment variables and merges them with + new variables, with new variables taking precedence on conflicts. + + Args: + full_function_name: Fully qualified function name + envs: New environment variables to add/update + + Returns: + Merged environment variables dictionary + """ get_req = ( self.function_client.projects().locations().functions().get(name=full_function_name) @@ -432,7 +606,18 @@ def _update_envs(self, full_function_name: str, envs: dict) -> dict: return envs - def _generate_function_envs(self, code_package: Benchmark) -> dict: + def _generate_function_envs(self, code_package: Benchmark) -> Dict: + """Generate environment variables for function based on benchmark requirements. + + Creates environment variables needed by the benchmark, such as NoSQL + database connection information. + + Args: + code_package: Benchmark package with module requirements + + Returns: + Dictionary of environment variables for the function + """ envs = {} if code_package.uses_nosql: @@ -447,8 +632,24 @@ def _generate_function_envs(self, code_package: Benchmark) -> dict: return envs def update_function_configuration( - self, function: Function, code_package: Benchmark, env_variables: dict = {} - ): + self, function: Function, code_package: Benchmark, env_variables: Dict = {} + ) -> int: + """Update function configuration including memory, timeout, and environment. + + Updates the Cloud Function's memory allocation, timeout, and environment + variables without changing the code. Waits for deployment to complete. + + Args: + function: Function instance to update + code_package: Benchmark package with configuration requirements + env_variables: Additional environment variables to set + + Returns: + Version ID of the updated function + + Raises: + RuntimeError: If configuration update fails after maximum retries + """ assert code_package.has_input_processed @@ -520,22 +721,59 @@ def update_function_configuration( return versionId @staticmethod - def get_full_function_name(project_name: str, location: str, func_name: str): + def get_full_function_name(project_name: str, location: str, func_name: str) -> str: + """Generate the fully qualified function name for GCP API calls. + + Args: + project_name: GCP project ID + location: GCP region/location + func_name: Function name + + Returns: + Fully qualified function name in GCP format + """ return f"projects/{project_name}/locations/{location}/functions/{func_name}" - def prepare_experiment(self, benchmark): + def prepare_experiment(self, benchmark: str) -> str: + """Prepare storage resources for benchmark experiment. + + Creates a dedicated storage bucket for experiment logs and outputs. + + Args: + benchmark: Name of the benchmark being prepared + + Returns: + Name of the created logs storage bucket + """ logs_bucket = self._system_resources.get_storage().add_output_bucket( benchmark, suffix="logs" ) return logs_bucket def shutdown(self) -> None: + """Shutdown the GCP system and clean up resources. + + Performs cleanup of system resources and calls parent shutdown method. + """ cast(GCPSystemResources, self._system_resources).shutdown() super().shutdown() def download_metrics( - self, function_name: str, start_time: int, end_time: int, requests: dict, metrics: dict - ): + self, function_name: str, start_time: int, end_time: int, requests: Dict, metrics: Dict + ) -> None: + """Download execution metrics and logs from GCP monitoring services. + + Retrieves function execution times from Cloud Logging and performance + metrics from Cloud Monitoring. Processes logs to extract execution times + and collects metrics like memory usage and network egress. + + Args: + function_name: Name of the function to collect metrics for + start_time: Start timestamp for metric collection (Unix timestamp) + end_time: End timestamp for metric collection (Unix timestamp) + requests: Dictionary of requests keyed by execution ID + metrics: Dictionary to populate with collected metrics + """ from google.api_core import exceptions from time import sleep @@ -651,7 +889,19 @@ def wrapper(gen): } ] - def _enforce_cold_start(self, function: Function, code_package: Benchmark): + def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> int: + """Force a cold start by updating function configuration. + + Triggers a cold start by updating the function's environment variables + with a unique counter value, forcing GCP to create a new instance. + + Args: + function: Function instance to enforce cold start on + code_package: Benchmark package for configuration + + Returns: + Version ID of the updated function + """ self.cold_start_counter += 1 new_version = self.update_function_configuration( @@ -660,7 +910,16 @@ def _enforce_cold_start(self, function: Function, code_package: Benchmark): return new_version - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """Enforce cold starts for multiple functions simultaneously. + + Updates all provided functions to force cold starts and waits for + all deployments to complete before returning. + + Args: + functions: List of functions to enforce cold starts on + code_package: Benchmark package for configuration + """ new_versions = [] for func in functions: @@ -687,6 +946,18 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) self.cold_start_counter += 1 def get_functions(self, code_package: Benchmark, function_names: List[str]) -> List["Function"]: + """Retrieve multiple function instances and ensure they are deployed. + + Gets function instances for the provided names and waits for all + functions to be in ACTIVE deployment state. + + Args: + code_package: Benchmark package for function creation + function_names: List of function names to retrieve + + Returns: + List of deployed function instances + """ functions: List["Function"] = [] undeployed_functions_before = [] @@ -716,6 +987,15 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L return functions def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: + """Check if a function is deployed and optionally verify version. + + Args: + func_name: Name of the function to check + versionId: Optional specific version ID to verify (-1 to check any) + + Returns: + Tuple of (is_deployed, current_version_id) + """ name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) @@ -726,22 +1006,32 @@ def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: return (status_res["versionId"] == versionId, status_res["versionId"]) def deployment_version(self, func: Function) -> int: + """Get the current deployment version ID of a function. + + Args: + func: Function instance to check + + Returns: + Current version ID of the function + """ name = GCP.get_full_function_name(self.config.project_name, self.config.region, func.name) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() return int(status_res["versionId"]) - """ - Helper method for recursive_zip - - :param base_directory: path to directory to be zipped - :param path: path to file of subdirectory to be zipped - :param archive: ZipFile object - """ - @staticmethod - def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): + def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile) -> None: + """Recursively add files and directories to a zip archive. + + Helper method for recursive_zip that handles directory traversal + and adds files with relative paths to the archive. + + Args: + base_directory: Base directory path for relative path calculation + path: Current path being processed (file or directory) + archive: ZipFile object to add files to + """ paths = os.listdir(path) for p in paths: directory = os.path.join(path, p) @@ -751,19 +1041,20 @@ def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): if directory != archive.filename: # prevent form including itself archive.write(directory, os.path.relpath(directory, base_directory)) - """ - https://gist.github.com/felixSchl/d38b455df8bf83a78d3d - - Zip directory with relative paths given an absolute path - If the archive exists only new files are added and updated. - If the archive does not exist a new one is created. - - :param path: absolute path to the directory to be zipped - :param archname: path to the zip file - """ - @staticmethod - def recursive_zip(directory: str, archname: str): + def recursive_zip(directory: str, archname: str) -> bool: + """Create a zip archive of a directory with relative paths. + + Creates a compressed zip archive of the specified directory, preserving + the relative directory structure. Uses maximum compression level. + + Args: + directory: Absolute path to the directory to be zipped + archname: Path where the zip file should be created + + Returns: + True if archiving was successful + """ archive = zipfile.ZipFile(archname, "w", zipfile.ZIP_DEFLATED, compresslevel=9) if os.path.isdir(directory): GCP.helper_zip(directory, directory, archive) diff --git a/sebs/gcp/resources.py b/sebs/gcp/resources.py index 0a7d5c14..e9d85ac1 100644 --- a/sebs/gcp/resources.py +++ b/sebs/gcp/resources.py @@ -1,3 +1,21 @@ +"""System resource management for Google Cloud Platform. + +This module provides the GCPSystemResources class that manages all GCP resources +required for serverless benchmarking, including storage, NoSQL databases, and +CLI tools. It coordinates resource allocation and provides unified access to +GCP services. + +Classes: + GCPSystemResources: Main resource manager for GCP services + +Example: + Creating and using GCP system resources: + + resources = GCPSystemResources(system_config, gcp_config, cache, docker_client, handlers) + storage = resources.get_storage(replace_existing=False) + datastore = resources.get_nosql_storage() +""" + from typing import cast, Optional from sebs.config import SeBSConfig @@ -13,12 +31,35 @@ class GCPSystemResources(SystemResources): + """System resource manager for Google Cloud Platform services. + + Manages and provides access to all GCP services required for serverless + benchmarking including Cloud Storage, Firestore/Datastore, and gcloud CLI. + Handles resource initialization, configuration, and cleanup. + + Attributes: + _storage: Cloud Storage instance for object storage + _nosql_storage: Datastore instance for NoSQL operations + _cli_instance: gcloud CLI interface for administrative operations + _system_config: SeBS system configuration + _logging_handlers: Logging configuration + """ @staticmethod def typename() -> str: + """Get the type name for this resource manager. + + Returns: + Type name string for GCP system resources + """ return "GCP.SystemResources" @property def config(self) -> GCPConfig: + """Get the GCP configuration instance. + + Returns: + GCP configuration with credentials and settings + """ return cast(GCPConfig, self._config) def __init__( @@ -28,7 +69,16 @@ def __init__( cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize GCP system resources manager. + + Args: + system_config: SeBS system configuration + config: GCP-specific configuration + cache_client: Cache instance for resource state + docker_client: Docker client for containerized operations + logger_handlers: Logging configuration + """ super().__init__(config, cache_client, docker_client) self._logging_handlers = logger_handlers @@ -37,15 +87,19 @@ def __init__( self._cli_instance: Optional[GCloudCLI] = None self._system_config = system_config - """ - Access persistent storage instance. - It might be a remote and truly persistent service (AWS S3, Azure Blob..), - or a dynamically allocated local instance. - - :param replace_existing: replace benchmark input data if exists already - """ - def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: + """Get or create the Cloud Storage instance. + + Provides access to Google Cloud Storage for persistent object storage. + Creates the storage instance if it doesn't exist, or updates the + replace_existing setting if provided. + + Args: + replace_existing: Whether to replace existing benchmark input data + + Returns: + Initialized GCP storage instance + """ if not self._storage: self._storage = GCPStorage( self.config.region, @@ -59,6 +113,14 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: return self._storage def get_nosql_storage(self) -> Datastore: + """Get or create the Datastore instance for NoSQL operations. + + Provides access to Google Cloud Firestore/Datastore for NoSQL + database operations required by benchmarks. + + Returns: + Initialized Datastore instance + """ if not self._nosql_storage: self._nosql_storage = Datastore( self.cli_instance, self._cache_client, self.config.resources, self.config.region @@ -67,6 +129,14 @@ def get_nosql_storage(self) -> Datastore: @property def cli_instance(self) -> GCloudCLI: + """Get or create the gcloud CLI instance. + + Provides access to a containerized gcloud CLI for administrative + operations. Initializes and authenticates the CLI if needed. + + Returns: + Authenticated gcloud CLI instance + """ if self._cli_instance is None: self._cli_instance = GCloudCLI( self.config.credentials, self._system_config, self._docker_client @@ -76,10 +146,21 @@ def cli_instance(self) -> GCloudCLI: self._cli_instance.login(self.config.credentials.project_name) return self._cli_instance - def initialize_cli(self, cli: GCloudCLI): + def initialize_cli(self, cli: GCloudCLI) -> None: + """Initialize with an existing CLI instance. + + Uses a pre-configured CLI instance instead of creating a new one. + + Args: + cli: Pre-configured gcloud CLI instance + """ self._cli_instance = cli self._cli_instance_stop = False def shutdown(self) -> None: + """Shutdown system resources and clean up. + + Stops the gcloud CLI container if it was created by this instance. + """ if self._cli_instance and self._cli_instance_stop: self._cli_instance.shutdown() diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index c578966f..772c0491 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -1,3 +1,20 @@ +"""Google Cloud Storage implementation for SeBS. + +This module provides the GCPStorage class that implements object storage operations +using Google Cloud Storage. It handles bucket management, file uploads/downloads, +and storage resource allocation for benchmarks and deployment artifacts. + +Classes: + GCPStorage: Google Cloud Storage implementation with bucket and blob management + +Example: + Using GCP storage for benchmark files: + + storage = GCPStorage(region, cache, resources, replace_existing=False) + bucket = storage.add_benchmark_bucket("my-benchmark") + storage.upload(bucket, "/path/to/file.zip", "benchmark-code.zip") +""" + import logging import os import uuid @@ -12,12 +29,32 @@ class GCPStorage(PersistentStorage): + """Google Cloud Storage implementation for SeBS persistent storage. + + Provides object storage capabilities using Google Cloud Storage buckets. + Handles bucket creation, file operations, and storage resource management + for benchmarks, deployment artifacts, and experiment outputs. + + Attributes: + client: Google Cloud Storage client instance + cached: Whether storage operations use cached data + """ @staticmethod def typename() -> str: + """Get the type name for this storage implementation. + + Returns: + Type name string for GCP storage + """ return "GCP.GCPStorage" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment name for this storage implementation. + + Returns: + Deployment name string 'gcp' + """ return "gcp" @property @@ -30,16 +67,42 @@ def replace_existing(self, val: bool): def __init__( self, region: str, cache_client: Cache, resources: Resources, replace_existing: bool - ): + ) -> None: + """Initialize GCP Storage client. + + Args: + region: GCP region for storage resources + cache_client: Cache instance for storing storage state + resources: Resource configuration + replace_existing: Whether to replace existing files during uploads + """ super().__init__(region, cache_client, resources, replace_existing) self.replace_existing = replace_existing self.client = gcp_storage.Client() self.cached = False def correct_name(self, name: str) -> str: + """Correct bucket name to meet GCP naming requirements. + + Args: + name: Original bucket name + + Returns: + Corrected bucket name (no changes needed for GCP) + """ return name - def _create_bucket(self, name, buckets: List[str] = [], randomize_name: bool = False): + def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: bool = False) -> str: + """Create a new Cloud Storage bucket or return existing one. + + Args: + name: Base name for the bucket + buckets: List of existing bucket names to check + randomize_name: Whether to append random suffix to avoid name conflicts + + Returns: + Name of the created or existing bucket + """ found_bucket = False for bucket_name in buckets: if name in bucket_name: @@ -62,12 +125,26 @@ def _create_bucket(self, name, buckets: List[str] = [], randomize_name: bool = F return bucket_name def download(self, bucket_name: str, key: str, filepath: str) -> None: + """Download a file from Cloud Storage. + + Args: + bucket_name: Name of the storage bucket + key: Object key/path in the bucket + filepath: Local file path to save the downloaded file + """ logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) bucket_instance = self.client.bucket(bucket_name) blob = bucket_instance.blob(key) blob.download_to_filename(filepath) - def upload(self, bucket_name: str, filepath: str, key: str): + def upload(self, bucket_name: str, filepath: str, key: str) -> None: + """Upload a file to Cloud Storage. + + Args: + bucket_name: Name of the storage bucket + filepath: Local file path to upload + key: Object key/path in the bucket for the uploaded file + """ logging.info("Upload {} to {}".format(filepath, bucket_name)) bucket_instance = self.client.bucket(bucket_name) blob = bucket_instance.blob(key, chunk_size=4 * 1024 * 1024) @@ -75,6 +152,14 @@ def upload(self, bucket_name: str, filepath: str, key: str): blob.upload_from_filename(filepath) def exists_bucket(self, bucket_name: str) -> bool: + """Check if a Cloud Storage bucket exists. + + Args: + bucket_name: Name of the bucket to check + + Returns: + True if bucket exists and is accessible, False otherwise + """ try: return self.client.bucket(bucket_name).exists() # 403 returned when the bucket exists but is owned by another user @@ -82,12 +167,29 @@ def exists_bucket(self, bucket_name: str) -> bool: return False def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """List objects in a Cloud Storage bucket with optional prefix filter. + + Args: + bucket_name: Name of the bucket to list + prefix: Optional prefix to filter objects + + Returns: + List of object names in the bucket matching the prefix + """ bucket_instance = self.client.get_bucket(bucket_name) all_blobs = list(self.client.list_blobs(bucket_instance)) blobs = [blob.name for blob in all_blobs if prefix in blob.name] return blobs def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """List Cloud Storage buckets, optionally filtered by name. + + Args: + bucket_name: Optional bucket name filter + + Returns: + List of bucket names, filtered if bucket_name is provided + """ all_buckets = list(self.client.list_buckets()) if bucket_name is not None: buckets = [bucket.name for bucket in all_buckets if bucket_name in bucket.name] @@ -95,13 +197,36 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: buckets = [bucket.name for bucket in all_buckets] return buckets - def remove_bucket(self, bucket_name: str): + def remove_bucket(self, bucket_name: str) -> None: + """Remove a Cloud Storage bucket. + + Args: + bucket_name: Name of the bucket to remove + """ self.client.get_bucket(bucket_name).delete() - def clean_bucket(self, bucket: str): + def clean_bucket(self, bucket: str) -> None: + """Clean all objects from a Cloud Storage bucket. + + Args: + bucket: Name of the bucket to clean + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: + """Upload function for batch operations with caching support. + + Uploads a file to the appropriate benchmark bucket, respecting cache + settings and replace_existing configuration. + + Args: + path_idx: Index of the input path prefix + key: Object key for the uploaded file + filepath: Local file path to upload + """ if self.cached and not self.replace_existing: return diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 13cc3d6c..ac0fd2c7 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -1,3 +1,25 @@ +"""Trigger implementations for Google Cloud Functions. + +This module provides trigger classes for invoking Cloud Functions through different +mechanisms including direct library calls and HTTP requests. Supports both +synchronous and asynchronous invocation patterns. + +Classes: + LibraryTrigger: Direct Cloud Functions API invocation trigger + HTTPTrigger: HTTP endpoint invocation trigger + +Example: + Using a library trigger for direct invocation: + + trigger = LibraryTrigger("my-function", gcp_client) + result = trigger.sync_invoke({"input": "data"}) + + Using an HTTP trigger: + + trigger = HTTPTrigger("https://region-project.cloudfunctions.net/my-function") + result = trigger.sync_invoke({"input": "data"}) +""" + import concurrent.futures import datetime import json @@ -9,29 +31,79 @@ class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + """Direct Cloud Functions API trigger for synchronous invocation. + + Uses the Google Cloud Functions API to invoke functions directly through + the cloud functions client. Provides precise execution timing and error + handling. Waits for function deployment before invocation. + + Attributes: + name: Function name to invoke + _deployment_client: GCP client for API operations + """ + def __init__(self, fname: str, deployment_client: Optional[GCP] = None) -> None: + """Initialize library trigger for direct function invocation. + + Args: + fname: Name of the Cloud Function to invoke + deployment_client: Optional GCP client for API operations + """ super().__init__() self.name = fname self._deployment_client = deployment_client @staticmethod def typename() -> str: + """Get the type name for this trigger implementation. + + Returns: + Type name string for library triggers + """ return "GCP.LibraryTrigger" @property def deployment_client(self) -> GCP: + """Get the GCP deployment client. + + Returns: + GCP client instance for API operations + + Raises: + AssertionError: If deployment client is not set + """ assert self._deployment_client return self._deployment_client @deployment_client.setter - def deployment_client(self, deployment_client: GCP): + def deployment_client(self, deployment_client: GCP) -> None: + """Set the GCP deployment client. + + Args: + deployment_client: GCP client instance + """ self._deployment_client = deployment_client @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type for this implementation. + + Returns: + Library trigger type enum value + """ return Trigger.TriggerType.LIBRARY - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: Dict) -> ExecutionResult: + """Synchronously invoke the Cloud Function using the API. + + Waits for function deployment, then invokes via Cloud Functions API. + Measures execution time and handles errors. + + Args: + payload: Input data to send to the function + + Returns: + ExecutionResult with timing, output, and error information + """ self.logging.info(f"Invoke function {self.name}") @@ -71,43 +143,116 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: gcp_result.parse_benchmark_output(output) return gcp_result - def async_invoke(self, payload: dict): + def async_invoke(self, payload: Dict): + """Asynchronously invoke the Cloud Function. + + Args: + payload: Input data to send to the function + + Raises: + NotImplementedError: Async invocation not implemented for library triggers + """ raise NotImplementedError() - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize trigger to dictionary for cache storage. + + Returns: + Dictionary containing trigger type and name + """ return {"type": "Library", "name": self.name} @staticmethod - def deserialize(obj: dict) -> Trigger: + def deserialize(obj: Dict) -> Trigger: + """Deserialize trigger from cached configuration. + + Args: + obj: Dictionary containing serialized trigger data + + Returns: + Reconstructed LibraryTrigger instance + """ return LibraryTrigger(obj["name"]) class HTTPTrigger(Trigger): - def __init__(self, url: str): + """HTTP endpoint trigger for Cloud Functions invocation. + + Invokes Cloud Functions through their HTTP endpoints, supporting both + synchronous and asynchronous execution patterns using HTTP requests. + + Attributes: + url: HTTP endpoint URL for the Cloud Function + """ + def __init__(self, url: str) -> None: + """Initialize HTTP trigger with function endpoint URL. + + Args: + url: HTTP endpoint URL for the Cloud Function + """ super().__init__() self.url = url @staticmethod def typename() -> str: + """Get the type name for this trigger implementation. + + Returns: + Type name string for HTTP triggers + """ return "GCP.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type for this implementation. + + Returns: + HTTP trigger type enum value + """ return Trigger.TriggerType.HTTP - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: Dict) -> ExecutionResult: + """Synchronously invoke the Cloud Function via HTTP. + + Args: + payload: Input data to send to the function + + Returns: + ExecutionResult from the HTTP invocation + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url) - def async_invoke(self, payload: dict) -> concurrent.futures.Future: + def async_invoke(self, payload: Dict) -> concurrent.futures.Future: + """Asynchronously invoke the Cloud Function via HTTP. + + Args: + payload: Input data to send to the function + + Returns: + Future object for the async HTTP invocation + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize trigger to dictionary for cache storage. + + Returns: + Dictionary containing trigger type and URL + """ return {"type": "HTTP", "url": self.url} @staticmethod - def deserialize(obj: dict) -> Trigger: + def deserialize(obj: Dict) -> Trigger: + """Deserialize trigger from cached configuration. + + Args: + obj: Dictionary containing serialized trigger data + + Returns: + Reconstructed HTTPTrigger instance + """ return HTTPTrigger(obj["url"]) diff --git a/sebs/local/__init__.py b/sebs/local/__init__.py index caded0a6..4e71f98a 100644 --- a/sebs/local/__init__.py +++ b/sebs/local/__init__.py @@ -1,3 +1,19 @@ +"""SeBS local execution platform module. + +This module provides the local execution platform for the Serverless Benchmarking Suite. +It enables running serverless functions locally using Docker containers, providing a +development and testing environment that mimics serverless execution without requiring +cloud platform deployment. + +Key components: +- Local: Main system class for local function execution +- LocalFunction: Represents a function deployed locally in a Docker container +- Deployment: Manages deployments and memory measurements for local functions + +The local platform supports HTTP triggers and provides memory profiling capabilities +for performance analysis. +""" + from .local import Local # noqa from .function import LocalFunction # noqa from .deployment import Deployment # noqa diff --git a/sebs/local/config.py b/sebs/local/config.py index 0b512c67..78a54074 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -1,3 +1,16 @@ +"""Configuration classes for the local execution platform. + +This module provides configuration classes for the SeBS local execution platform, +including credentials, resources, and overall configuration management. The local +platform requires minimal configuration since it runs functions locally using +Docker containers. + +Classes: + LocalCredentials: Empty credentials class for local execution + LocalResources: Resource management for local deployments + LocalConfig: Main configuration class for local platform +""" + from typing import cast, Optional, Set from sebs.cache import Cache @@ -8,11 +21,33 @@ class LocalCredentials(Credentials): + """Credentials class for local execution platform. + + The local platform doesn't require any authentication credentials since + functions run locally using Docker containers. This class provides the + required interface with empty implementations. + """ + def serialize(self) -> dict: + """Serialize credentials to dictionary. + + Returns: + dict: Empty dictionary as no credentials are required for local execution + """ return {} @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """Deserialize credentials from configuration. + + Args: + config: Configuration dictionary (unused for local) + cache: Cache client (unused for local) + handlers: Logging handlers (unused for local) + + Returns: + LocalCredentials: New instance of local credentials + """ return LocalCredentials() @@ -23,34 +58,71 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden class LocalResources(SelfHostedResources): + """Resource management for local execution platform. + + Manages resources for local function execution, including port allocation + for Docker containers and storage configurations. Tracks allocated ports + to avoid conflicts when running multiple functions. + + Attributes: + _path: Path for local resource storage + _allocated_ports: Set of ports currently allocated to containers + """ + def __init__( self, storage_cfg: Optional[PersistentStorageConfig] = None, nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, ): + """Initialize local resources. + + Args: + storage_cfg: Optional persistent storage configuration + nosql_storage_cfg: Optional NoSQL storage configuration + """ self._path: str = "" super().__init__("local", storage_cfg, nosql_storage_cfg) self._allocated_ports: Set[int] = set() @property def allocated_ports(self) -> set: + """Get the set of allocated ports. + + Returns: + set: Set of port numbers currently allocated to containers + """ return self._allocated_ports def serialize(self) -> dict: + """Serialize resources to dictionary. + + Returns: + dict: Dictionary containing resource configuration including allocated ports + """ out = super().serialize() out["allocated_ports"] = list(self._allocated_ports) return out @staticmethod - def initialize(res: Resources, config: dict): - + def initialize(res: Resources, config: dict) -> None: + """Initialize resources from configuration. + + Args: + res: Resources instance to initialize + config: Configuration dictionary containing resource settings + """ resources = cast(LocalResources, res) if "allocated_ports" in config: resources._allocated_ports = set(config["allocated_ports"]) - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update cache with current resource state. + + Args: + cache: Cache client to update + """ super().update_cache(cache) cache.update_config( val=list(self._allocated_ports), keys=["local", "resources", "allocated_ports"] @@ -58,6 +130,16 @@ def update_cache(self, cache: Cache): @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """Deserialize resources from configuration. + + Args: + config: Configuration dictionary + cache: Cache client for loading cached resources + handlers: Logging handlers for resource logging + + Returns: + LocalResources: Initialized local resources instance + """ ret = LocalResources() cached_config = cache.get_config("local") @@ -77,34 +159,84 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class LocalConfig(Config): + """Configuration class for local execution platform. + + Provides the main configuration interface for the local platform, + combining credentials and resources. The local platform requires + minimal configuration since it runs functions locally. + + Attributes: + _credentials: Local credentials instance (empty) + _resources: Local resources instance for port management + """ + def __init__(self): + """Initialize local configuration.""" super().__init__(name="local") self._credentials = LocalCredentials() self._resources = LocalResources() @staticmethod def typename() -> str: + """Get the type name for this configuration. + + Returns: + str: Type name "Local.Config" + """ return "Local.Config" @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: dict) -> None: + """Initialize configuration from dictionary. + + Args: + cfg: Configuration instance to initialize + dct: Dictionary containing configuration data + + Note: + No initialization needed for local platform + """ pass @property def credentials(self) -> LocalCredentials: + """Get the local credentials. + + Returns: + LocalCredentials: The credentials instance + """ return self._credentials @property def resources(self) -> LocalResources: + """Get the local resources. + + Returns: + LocalResources: The resources instance + """ return self._resources @resources.setter - def resources(self, val: LocalResources): + def resources(self, val: LocalResources) -> None: + """Set the local resources. + + Args: + val: New resources instance + """ self._resources = val @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: - + """Deserialize configuration from dictionary. + + Args: + config: Configuration dictionary + cache: Cache client for loading cached configuration + handlers: Logging handlers for configuration logging + + Returns: + LocalConfig: Initialized local configuration instance + """ config_obj = LocalConfig() config_obj.resources = cast( LocalResources, LocalResources.deserialize(config, cache, handlers) @@ -113,8 +245,18 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config return config_obj def serialize(self) -> dict: + """Serialize configuration to dictionary. + + Returns: + dict: Dictionary containing configuration data + """ out = {"name": "local", "region": self._region, "resources": self._resources.serialize()} return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update cache with current configuration. + + Args: + cache: Cache client to update + """ self.resources.update_cache(cache) diff --git a/sebs/local/deployment.py b/sebs/local/deployment.py index 85f7df8e..92ede889 100644 --- a/sebs/local/deployment.py +++ b/sebs/local/deployment.py @@ -1,3 +1,20 @@ +"""Deployment management for local execution platform. + +This module provides the Deployment class for managing local function deployments, +including memory measurement collection, function lifecycle management, and +resource cleanup. + +The Deployment class handles: +- Function container management +- Memory measurement process coordination +- Input/output serialization +- Storage configuration +- Resource cleanup and shutdown + +Classes: + Deployment: Main deployment management class for local functions +""" + import json import os from signal import SIGKILL @@ -12,15 +29,40 @@ class Deployment(LoggingBase): + """Manages local function deployments and memory measurements. + + Coordinates the lifecycle of locally deployed functions, including container + management, memory measurement collection, and resource cleanup. Handles + serialization of deployment state for persistence and recovery. + + Attributes: + _functions: List of deployed local functions + _storage: Optional Minio storage instance + _inputs: List of function input configurations + _memory_measurement_pids: PIDs of memory measurement processes + _measurement_file: Path to memory measurement output file + """ + @property def measurement_file(self) -> Optional[str]: + """Get the path to the memory measurement file. + + Returns: + Optional[str]: Path to measurement file, or None if not set + """ return self._measurement_file @measurement_file.setter - def measurement_file(self, val: Optional[str]): + def measurement_file(self, val: Optional[str]) -> None: + """Set the path to the memory measurement file. + + Args: + val: Path to measurement file, or None to unset + """ self._measurement_file = val def __init__(self): + """Initialize a new deployment.""" super().__init__() self._functions: List[LocalFunction] = [] self._storage: Optional[Minio] @@ -28,18 +70,38 @@ def __init__(self): self._memory_measurement_pids: List[int] = [] self._measurement_file: Optional[str] = None - def add_function(self, func: LocalFunction): + def add_function(self, func: LocalFunction) -> None: + """Add a function to the deployment. + + Args: + func: Local function to add to the deployment + """ self._functions.append(func) if func.memory_measurement_pid is not None: self._memory_measurement_pids.append(func.memory_measurement_pid) - def add_input(self, func_input: dict): + def add_input(self, func_input: dict) -> None: + """Add function input configuration to the deployment. + + Args: + func_input: Dictionary containing function input configuration + """ self._inputs.append(func_input) - def set_storage(self, storage: Minio): + def set_storage(self, storage: Minio) -> None: + """Set the storage instance for the deployment. + + Args: + storage: Minio storage instance to use + """ self._storage = storage - def serialize(self, path: str): + def serialize(self, path: str) -> None: + """Serialize deployment configuration to file. + + Args: + path: File path to write serialized deployment configuration + """ with open(path, "w") as out: config: dict = { "functions": self._functions, @@ -55,9 +117,20 @@ def serialize(self, path: str): out.write(serialize(config)) - # FIXME: do we still use it? @staticmethod def deserialize(path: str, cache_client: Cache) -> "Deployment": + """Deserialize deployment configuration from file. + + Args: + path: File path to read serialized deployment configuration + cache_client: Cache client for loading cached resources + + Returns: + Deployment: Deserialized deployment instance + + Note: + This method may be deprecated - check if still in use + """ with open(path, "r") as in_f: input_data = json.load(in_f) deployment = Deployment() @@ -73,8 +146,16 @@ def deserialize(path: str, cache_client: Cache) -> "Deployment": ) return deployment - def shutdown(self, output_json: str): - + def shutdown(self, output_json: str) -> None: + """Shutdown the deployment and collect memory measurements. + + Terminates all memory measurement processes, processes measurement data, + and stops all function containers. Memory measurements are aggregated + and written to the specified output file. + + Args: + output_json: Path to write memory measurement results + """ if len(self._memory_measurement_pids) > 0: self.logging.info("Killing memory measurement processes") diff --git a/sebs/local/function.py b/sebs/local/function.py index f0104a4e..838773ae 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -1,3 +1,14 @@ +"""Function and trigger implementations for local execution platform. + +This module provides classes for managing functions and triggers in the local +execution environment. Functions run as Docker containers with HTTP triggers +for invocation. + +Classes: + HTTPTrigger: HTTP-based trigger for local function invocation + LocalFunction: Represents a function deployed locally in a Docker container +""" + import concurrent.futures import docker import json @@ -8,36 +19,103 @@ class HTTPTrigger(Trigger): + """HTTP trigger for local function invocation. + + Provides HTTP-based triggering for functions running in local Docker containers. + Supports both synchronous and asynchronous invocation patterns. + + Attributes: + url: HTTP URL endpoint for function invocation + """ + def __init__(self, url: str): + """Initialize HTTP trigger. + + Args: + url: HTTP URL endpoint for the function + """ super().__init__() self.url = url @staticmethod def typename() -> str: + """Get the type name for this trigger. + + Returns: + str: Type name "Local.HTTPTrigger" + """ return "Local.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type. + + Returns: + Trigger.TriggerType: HTTP trigger type + """ return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: + """Synchronously invoke the function via HTTP. + + Args: + payload: Function input payload as dictionary + + Returns: + ExecutionResult: Result of the function execution + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """Asynchronously invoke the function via HTTP. + + Args: + payload: Function input payload as dictionary + + Returns: + concurrent.futures.Future: Future object for the execution result + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """Serialize trigger configuration to dictionary. + + Returns: + dict: Dictionary containing trigger type and URL + """ return {"type": "HTTP", "url": self.url} @staticmethod def deserialize(obj: dict) -> Trigger: + """Deserialize trigger from dictionary. + + Args: + obj: Dictionary containing trigger configuration + + Returns: + HTTPTrigger: Deserialized HTTP trigger instance + """ return HTTPTrigger(obj["url"]) class LocalFunction(Function): + """Function implementation for local execution platform. + + Represents a serverless function running locally in a Docker container. + Handles container management, URL resolution, and memory measurement + process tracking. + + Attributes: + _instance: Docker container running the function + _instance_id: Container ID for the function + _port: Port number the function is listening on + _url: Complete URL for function invocation + _measurement_pid: Optional PID of memory measurement process + """ + def __init__( self, docker_container, @@ -48,6 +126,20 @@ def __init__( config: FunctionConfig, measurement_pid: Optional[int] = None, ): + """Initialize local function. + + Args: + docker_container: Docker container instance running the function + port: Port number the function is listening on + name: Function name + benchmark: Benchmark name this function implements + code_package_hash: Hash of the function code package + config: Function configuration + measurement_pid: Optional PID of memory measurement process + + Raises: + RuntimeError: If container IP address cannot be determined + """ super().__init__(benchmark, name, code_package_hash, config) self._instance = docker_container self._instance_id = docker_container.id @@ -74,25 +166,55 @@ def __init__( @property def container(self) -> docker.models.containers.Container: + """Get the Docker container running this function. + + Returns: + docker.models.containers.Container: The Docker container instance + """ return self._instance @container.setter - def container(self, instance: docker.models.containers.Container): + def container(self, instance: docker.models.containers.Container) -> None: + """Set the Docker container for this function. + + Args: + instance: New Docker container instance + """ self._instance = instance @property def url(self) -> str: + """Get the URL for function invocation. + + Returns: + str: HTTP URL for invoking the function + """ return self._url @property def memory_measurement_pid(self) -> Optional[int]: + """Get the PID of the memory measurement process. + + Returns: + Optional[int]: PID of memory measurement process, or None if not measuring + """ return self._measurement_pid @staticmethod def typename() -> str: + """Get the type name for this function. + + Returns: + str: Type name "Local.LocalFunction" + """ return "Local.LocalFunction" def serialize(self) -> dict: + """Serialize function configuration to dictionary. + + Returns: + dict: Dictionary containing function configuration including container details + """ return { **super().serialize(), "instance_id": self._instance_id, @@ -102,6 +224,17 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LocalFunction": + """Deserialize function from cached configuration. + + Args: + cached_config: Dictionary containing cached function configuration + + Returns: + LocalFunction: Deserialized function instance + + Raises: + RuntimeError: If cached container is no longer available + """ try: instance_id = cached_config["instance_id"] instance = docker.from_env().containers.get(instance_id) @@ -117,7 +250,11 @@ def deserialize(cached_config: dict) -> "LocalFunction": except docker.errors.NotFound: raise RuntimeError(f"Cached container {instance_id} not available anymore!") - def stop(self): + def stop(self) -> None: + """Stop the function container. + + Stops the Docker container running this function with immediate timeout. + """ self.logging.info(f"Stopping function container {self._instance_id}") self._instance.stop(timeout=0) self.logging.info(f"Function container {self._instance_id} stopped succesfully") diff --git a/sebs/local/local.py b/sebs/local/local.py index 32b9f9ff..80d40c43 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -1,3 +1,21 @@ +"""Local execution platform for SeBS. + +This module implements the local execution platform for the Serverless Benchmarking +Suite. It runs serverless functions locally using Docker containers, providing a +development and testing environment that mimics serverless execution without requiring +cloud platform deployment. + +The local platform provides: +- Docker-based function execution +- HTTP triggers for function invocation +- Memory profiling and measurement capabilities +- Port management for multiple concurrent functions +- Cross-platform support (Linux, macOS, Windows) + +Key Classes: + Local: Main system class implementing the local execution platform +""" + import os import requests import shutil @@ -21,43 +39,107 @@ class Local(System): + """Local execution platform implementation. + + Implements the System interface for running serverless functions locally using + Docker containers. Provides development and testing capabilities without requiring + cloud platform deployment. + + This platform supports: + - HTTP-triggered function execution + - Memory measurement and profiling + - Multi-container deployment + - Cross-platform operation (Linux, macOS, Windows) + + Attributes: + DEFAULT_PORT: Default port number for function containers (9000) + _config: Local platform configuration + _remove_containers: Whether to automatically remove containers after use + _memory_measurement_path: Path to memory measurement file + _measure_interval: Interval for memory measurements (-1 disables) + """ DEFAULT_PORT = 9000 @staticmethod - def name(): + def name() -> str: + """Get the platform name. + + Returns: + str: Platform name "local" + """ return "local" @staticmethod - def typename(): + def typename() -> str: + """Get the platform type name. + + Returns: + str: Type name "Local" + """ return "Local" @staticmethod def function_type() -> "Type[Function]": + """Get the function type for this platform. + + Returns: + Type[Function]: LocalFunction class + """ return LocalFunction @property def config(self) -> LocalConfig: + """Get the local platform configuration. + + Returns: + LocalConfig: The platform configuration + """ return self._config @property def remove_containers(self) -> bool: + """Get whether containers are automatically removed. + + Returns: + bool: True if containers are removed after use + """ return self._remove_containers @remove_containers.setter - def remove_containers(self, val: bool): + def remove_containers(self, val: bool) -> None: + """Set whether containers are automatically removed. + + Args: + val: Whether to remove containers after use + """ self._remove_containers = val @property def measure_interval(self) -> int: + """Get the memory measurement interval. + + Returns: + int: Measurement interval in milliseconds, -1 if disabled + """ return self._measure_interval @property def measurements_enabled(self) -> bool: + """Check if memory measurements are enabled. + + Returns: + bool: True if measurements are enabled + """ return self._measure_interval > -1 @property def measurement_path(self) -> Optional[str]: + """Get the path to the memory measurement file. + + Returns: + Optional[str]: Path to measurement file, or None if not set + """ return self._memory_measurement_path def __init__( @@ -68,6 +150,15 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """Initialize the local execution platform. + + Args: + sebs_config: Global SeBS configuration + config: Local platform configuration + cache_client: Cache client for storing artifacts + docker_client: Docker client for container management + logger_handlers: Logging handlers for output + """ super().__init__( sebs_config, cache_client, @@ -85,31 +176,13 @@ def __init__( self.initialize_resources(select_prefix="local") - """ - Shut down minio storage instance. - """ - - def shutdown(self): + def shutdown(self) -> None: + """Shut down the local platform. + + Performs cleanup operations including shutting down any storage instances. + """ super().shutdown() - """ - It would be sufficient to just pack the code and ship it as zip to AWS. - However, to have a compatible function implementation across providers, - we create a small module. - Issue: relative imports in Python when using storage wrapper. - Azure expects a relative import inside a module. - - Structure: - function - - function.py - - storage.py - - resources - handler.py - - dir: directory where code is located - benchmark: benchmark name - """ - def package_code( self, directory: str, @@ -120,6 +193,31 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """Package function code for local execution. + + Creates a compatible code package structure for local execution that + maintains compatibility across cloud providers. Reorganizes files into + a module structure to handle relative imports properly. + + The packaging creates this structure: + - function/ + - function.py + - storage.py + - resources/ + - handler.py + + Args: + directory: Directory containing the function code + language_name: Programming language (e.g., "python", "nodejs") + language_version: Language version (e.g., "3.8", "14") + architecture: Target architecture (unused for local) + benchmark: Benchmark name + is_cached: Whether the package is from cache + container_deployment: Whether using container deployment + + Returns: + Tuple[str, int, str]: (package_path, size_bytes, deployment_package_uri) + """ CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], @@ -143,6 +241,23 @@ def package_code( def _start_container( self, code_package: Benchmark, func_name: str, func: Optional[LocalFunction] ) -> LocalFunction: + """Start a Docker container for function execution. + + Creates and starts a Docker container running the function code. Handles + port allocation, environment setup, volume mounting, and health checking. + Optionally starts memory measurement processes. + + Args: + code_package: Benchmark code package to deploy + func_name: Name of the function + func: Optional existing function to update (for restarts) + + Returns: + LocalFunction: Running function instance + + Raises: + RuntimeError: If port allocation fails or container won't start + """ container_name = "{}:run.local.{}.{}".format( self._system_config.docker_repository(), @@ -286,33 +401,61 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "LocalFunction": - + """Create a new function deployment. + + Args: + code_package: Benchmark code package to deploy + func_name: Name for the function + container_deployment: Whether to use container deployment (unsupported) + container_uri: Container URI (unused for local) + + Returns: + LocalFunction: Created function instance + + Raises: + NotImplementedError: If container deployment is requested + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Local") return self._start_container(code_package, func_name, None) - """ - Restart Docker container - """ - def update_function( self, function: Function, code_package: Benchmark, container_deployment: bool, container_uri: str, - ): + ) -> None: + """Update an existing function with new code. + + Stops the existing container and starts a new one with updated code. + + Args: + function: Existing function to update + code_package: New benchmark code package + container_deployment: Whether to use container deployment (unused) + container_uri: Container URI (unused) + """ func = cast(LocalFunction, function) func.stop() self.logging.info("Allocating a new function container with updated code") self._start_container(code_package, function.name, func) - """ - For local functions, we don't need to do anything for a cached function. - There's only one trigger - HTTP. - """ - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """Create a trigger for function invocation. + + For local functions, only HTTP triggers are supported. + + Args: + func: Function to create trigger for + trigger_type: Type of trigger to create + + Returns: + Trigger: Created trigger instance + + Raises: + RuntimeError: If trigger type is not HTTP + """ from sebs.local.function import HTTPTrigger function = cast(LocalFunction, func) @@ -326,10 +469,26 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T self.cache_client.update_function(function) return trigger - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """Handle cached function setup. + + For local functions, no special handling is needed for cached functions. + + Args: + function: Cached function instance + """ pass - def update_function_configuration(self, function: Function, code_package: Benchmark): + def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: + """Update function configuration. + + Args: + function: Function to update + code_package: Benchmark code package + + Raises: + RuntimeError: Always raised as configuration updates are not supported + """ self.logging.error("Updating function configuration of local deployment is not supported") raise RuntimeError("Updating function configuration of local deployment is not supported") @@ -340,16 +499,47 @@ def download_metrics( end_time: int, requests: Dict[str, ExecutionResult], metrics: dict, - ): + ) -> None: + """Download execution metrics. + + For local execution, metrics are not available from the platform. + + Args: + function_name: Name of the function + start_time: Start time for metrics collection + end_time: End time for metrics collection + requests: Execution requests to collect metrics for + metrics: Dictionary to store collected metrics + """ pass - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """Enforce cold start for functions. + + Args: + functions: List of functions to enforce cold start on + code_package: Benchmark code package + + Raises: + NotImplementedError: Cold start enforcement is not implemented for local + """ raise NotImplementedError() @staticmethod def default_function_name( code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """Generate default function name. + + Creates a standardized function name based on the code package and resources. + + Args: + code_package: Benchmark code package + resources: Optional resources instance for ID inclusion + + Returns: + str: Generated function name + """ # Create function name if resources is not None: func_name = "sebs-{}-{}-{}-{}".format( @@ -368,10 +558,30 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """Format function name for platform requirements. + + For local execution, no formatting is needed. + + Args: + func_name: Function name to format + + Returns: + str: Formatted function name (unchanged for local) + """ return func_name def start_measurements(self, measure_interval: int) -> Optional[str]: - + """Start memory measurements for function containers. + + Creates a temporary file for storing memory measurements and enables + measurement collection at the specified interval. + + Args: + measure_interval: Measurement interval in milliseconds + + Returns: + Optional[str]: Path to measurement file, or None if measurements disabled + """ self._measure_interval = measure_interval if not self.measurements_enabled: diff --git a/sebs/local/measureMem.py b/sebs/local/measureMem.py index 74cae636..9c3ac66b 100644 --- a/sebs/local/measureMem.py +++ b/sebs/local/measureMem.py @@ -1,9 +1,20 @@ -""" -Measure memory consumption of a specified docker container. +"""Memory measurement utility for Docker containers. + +This module provides functionality to measure memory consumption of Docker containers +running local serverless functions. It reads memory usage from the cgroup filesystem +at regular intervals and writes the measurements to a file for later analysis. + +The measurement process: +1. Reads memory.current from the container's cgroup +2. Records the measurement with container ID and timestamp +3. Tracks precision errors when measurement intervals are exceeded +4. Continues until the container stops or process is terminated + +Functions: + measure: Main measurement function that continuously monitors container memory -Specifically, the pseudofile memory.current from the cgroup -pseudo-filesystem is read by a shell command (cat) every few -milliseconds while the container is running. +Usage: + python measureMem.py --container-id --measure-interval --measurement-file """ import subprocess @@ -12,7 +23,22 @@ def measure(container_id: str, measure_interval: int, measurement_file: str) -> None: - + """Continuously measure memory consumption of a Docker container. + + Reads memory usage from the container's cgroup filesystem at regular intervals + and writes measurements to the specified file. Handles different cgroup paths + for compatibility with various Docker configurations. + + Args: + container_id: Docker container ID to monitor + measure_interval: Measurement interval in milliseconds + measurement_file: Path to file for writing measurements + + Note: + This function runs indefinitely until the process is terminated. + It attempts two different cgroup paths to accommodate different + Docker/systemd configurations. + """ f = open(measurement_file, "a") while True: @@ -33,13 +59,34 @@ def measure(container_id: str, measure_interval: int, measurement_file: str) -> time.sleep(max(0, (measure_interval - iter_duration / 1e6) / 1000)) -""" - Parse container ID and measure interval and start memory measurement process. -""" if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--container-id", type=str) - parser.add_argument("--measurement-file", type=str) - parser.add_argument("--measure-interval", type=int) + """Parse command line arguments and start memory measurement process. + + Command line arguments: + --container-id: Docker container ID to monitor + --measurement-file: Path to file for writing measurements + --measure-interval: Measurement interval in milliseconds + """ + parser = argparse.ArgumentParser( + description="Measure memory consumption of a Docker container" + ) + parser.add_argument( + "--container-id", + type=str, + required=True, + help="Docker container ID to monitor" + ) + parser.add_argument( + "--measurement-file", + type=str, + required=True, + help="Path to file for writing measurements" + ) + parser.add_argument( + "--measure-interval", + type=int, + required=True, + help="Measurement interval in milliseconds" + ) args, unknown = parser.parse_known_args() measure(args.container_id, args.measure_interval, args.measurement_file) diff --git a/sebs/openwhisk/__init__.py b/sebs/openwhisk/__init__.py index 614d9443..572c592c 100644 --- a/sebs/openwhisk/__init__.py +++ b/sebs/openwhisk/__init__.py @@ -1,2 +1,28 @@ +"""Apache OpenWhisk integration module for SeBS. + +This module provides the complete OpenWhisk integration for the Serverless +Benchmarking Suite (SeBS), including platform-specific implementations for +function deployment, configuration management, and execution. + +The module includes: +- OpenWhisk system integration and function management +- Configuration classes for credentials and resources +- Function and trigger implementations +- Docker container management +- CLI and HTTP-based invocation methods + +Main Classes: + OpenWhisk: Main OpenWhisk system implementation + OpenWhiskConfig: Configuration management for OpenWhisk deployments + OpenWhiskFunction: OpenWhisk-specific function implementation + LibraryTrigger: CLI-based function invocation + HTTPTrigger: HTTP-based function invocation + +Example: + >>> from sebs.openwhisk import OpenWhisk, OpenWhiskConfig + >>> config = OpenWhiskConfig.deserialize(config_dict, cache, handlers) + >>> system = OpenWhisk(sys_config, config, cache, docker_client, handlers) +""" + from .openwhisk import OpenWhisk # noqa from .config import OpenWhiskConfig # noqa diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py index bba54f7c..d16815c8 100644 --- a/sebs/openwhisk/config.py +++ b/sebs/openwhisk/config.py @@ -1,30 +1,95 @@ +""" +Configuration management for Apache OpenWhisk deployments in SeBS. + +This module provides configuration classes for managing OpenWhisk-specific settings, +credentials, and resources. It handles Docker registry configuration, storage settings, +and deployment parameters for OpenWhisk serverless functions. + +Classes: + OpenWhiskCredentials: Manages authentication credentials for OpenWhisk + OpenWhiskResources: Handles Docker registry and storage resources + OpenWhiskConfig: Main configuration class for OpenWhisk deployment settings +""" + from __future__ import annotations +from typing import Optional, cast, Dict, Any + from sebs.cache import Cache from sebs.faas.config import Credentials, Resources, Config from sebs.utils import LoggingHandlers from sebs.storage.resources import SelfHostedResources -from typing import cast, Optional - class OpenWhiskCredentials(Credentials): + """ + Manages authentication credentials for OpenWhisk deployments. + + This class handles credential serialization and deserialization for OpenWhisk + platforms. Currently implements a minimal credential system. + + Note: + OpenWhisk deployments typically rely on local authentication through + the wsk CLI tool rather than explicit credential management. + """ + @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Credentials: + """ + Deserialize OpenWhisk credentials from configuration. + + Args: + config: Configuration dictionary containing credential data + cache: Cache instance for storing/retrieving cached credentials + handlers: Logging handlers for credential operations + + Returns: + OpenWhiskCredentials instance (currently empty) + """ return OpenWhiskCredentials() - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """ + Serialize credentials to dictionary format. + + Returns: + Empty dictionary as OpenWhisk uses CLI-based authentication + """ return {} class OpenWhiskResources(SelfHostedResources): + """ + Manages Docker registry and storage resources for OpenWhisk deployments. + + This class handles configuration of Docker registries, authentication credentials, + and storage resources needed for OpenWhisk function deployments. It supports + both user-provided and cached configurations. + + Attributes: + _docker_registry: Docker registry URL for storing function images + _docker_username: Username for Docker registry authentication + _docker_password: Password for Docker registry authentication + _registry_updated: Flag indicating if registry configuration has been updated + _storage_updated: Flag indicating if storage configuration has been updated + """ + def __init__( self, registry: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, registry_updated: bool = False, - ): + ) -> None: + """ + Initialize OpenWhisk resources configuration. + + Args: + registry: Docker registry URL for storing function images + username: Username for Docker registry authentication + password: Password for Docker registry authentication + registry_updated: Whether registry configuration has been updated + """ super().__init__(name="openwhisk") self._docker_registry = registry if registry != "" else None self._docker_username = username if username != "" else None @@ -34,38 +99,95 @@ def __init__( @staticmethod def typename() -> str: + """ + Get the type name for this resource class. + + Returns: + String identifier for OpenWhisk resources + """ return "OpenWhisk.Resources" @property def docker_registry(self) -> Optional[str]: + """ + Get the Docker registry URL. + + Returns: + Docker registry URL or None if not configured + """ return self._docker_registry @property def docker_username(self) -> Optional[str]: + """ + Get the Docker registry username. + + Returns: + Docker registry username or None if not configured + """ return self._docker_username @property def docker_password(self) -> Optional[str]: + """ + Get the Docker registry password. + + Returns: + Docker registry password or None if not configured + """ return self._docker_password @property def storage_updated(self) -> bool: + """ + Check if storage configuration has been updated. + + Returns: + True if storage configuration has been updated, False otherwise + """ return self._storage_updated @property def registry_updated(self) -> bool: + """ + Check if registry configuration has been updated. + + Returns: + True if registry configuration has been updated, False otherwise + """ return self._registry_updated @staticmethod - def initialize(res: Resources, dct: dict): + def initialize(res: Resources, dct: Dict[str, Any]) -> None: + """ + Initialize OpenWhisk resources from dictionary configuration. + + Args: + res: Resources instance to initialize + dct: Dictionary containing Docker registry configuration + Expected keys: 'registry', 'username', 'password' + """ ret = cast(OpenWhiskResources, res) ret._docker_registry = dct["registry"] ret._docker_username = dct["username"] ret._docker_password = dct["password"] @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: - + def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Resources: + """ + Deserialize OpenWhisk resources from configuration. + + This method handles both user-provided configuration and cached values, + prioritizing user configuration while detecting updates. + + Args: + config: Configuration dictionary that may contain 'docker_registry' section + cache: Cache instance to retrieve/store configuration + handlers: Logging handlers for resource operations + + Returns: + OpenWhiskResources instance with appropriate configuration + """ cached_config = cache.get_config("openwhisk") ret = OpenWhiskResources() if cached_config: @@ -108,7 +230,13 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour return ret - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """ + Update cache with current resource configuration. + + Args: + cache: Cache instance to update with current configuration + """ super().update_cache(cache) cache.update_config( val=self.docker_registry, keys=["openwhisk", "resources", "docker", "registry"] @@ -120,8 +248,15 @@ def update_cache(self, cache: Cache): val=self.docker_password, keys=["openwhisk", "resources", "docker", "password"] ) - def serialize(self) -> dict: - out: dict = { + def serialize(self) -> Dict[str, Any]: + """ + Serialize resource configuration to dictionary. + + Returns: + Dictionary containing all resource configuration including + Docker registry settings and inherited storage configuration + """ + out: Dict[str, Any] = { **super().serialize(), "docker_registry": self.docker_registry, "docker_username": self.docker_username, @@ -131,11 +266,41 @@ def serialize(self) -> dict: class OpenWhiskConfig(Config): + """ + Main configuration class for OpenWhisk deployments. + + This class manages all OpenWhisk-specific configuration settings including + cluster management, WSK CLI settings, and experimental features. It integrates + with the broader SeBS configuration system. + + Attributes: + name: Platform name identifier ('openwhisk') + shutdownStorage: Whether to shutdown storage after experiments + removeCluster: Whether to remove cluster after experiments + wsk_exec: Path to WSK CLI executable + wsk_bypass_security: Whether to bypass security checks in WSK CLI + experimentalManifest: Whether to use experimental manifest features + cache: Cache instance for configuration persistence + _credentials: OpenWhisk credentials configuration + _resources: OpenWhisk resources configuration + """ + name: str shutdownStorage: bool + removeCluster: bool + wsk_exec: str + wsk_bypass_security: bool + experimentalManifest: bool cache: Cache - def __init__(self, config: dict, cache: Cache): + def __init__(self, config: Dict[str, Any], cache: Cache) -> None: + """ + Initialize OpenWhisk configuration. + + Args: + config: Configuration dictionary containing OpenWhisk settings + cache: Cache instance for configuration persistence + """ super().__init__(name="openwhisk") self._credentials = OpenWhiskCredentials() self._resources = OpenWhiskResources() @@ -148,17 +313,43 @@ def __init__(self, config: dict, cache: Cache): @property def credentials(self) -> OpenWhiskCredentials: + """ + Get OpenWhisk credentials configuration. + + Returns: + OpenWhiskCredentials instance + """ return self._credentials @property def resources(self) -> OpenWhiskResources: + """ + Get OpenWhisk resources configuration. + + Returns: + OpenWhiskResources instance + """ return self._resources @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: Dict[str, Any]) -> None: + """ + Initialize configuration from dictionary (currently no-op). + + Args: + cfg: Configuration instance to initialize + dct: Dictionary containing initialization data + """ pass - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """ + Serialize configuration to dictionary format. + + Returns: + Dictionary containing all OpenWhisk configuration settings + including credentials and resources + """ return { "name": "openwhisk", "shutdownStorage": self.shutdownStorage, @@ -171,7 +362,18 @@ def serialize(self) -> dict: } @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Config: + """ + Deserialize OpenWhisk configuration from dictionary and cache. + + Args: + config: Configuration dictionary containing OpenWhisk settings + cache: Cache instance to retrieve cached configuration + handlers: Logging handlers for configuration operations + + Returns: + OpenWhiskConfig instance with deserialized configuration + """ cached_config = cache.get_config("openwhisk") resources = cast( OpenWhiskResources, OpenWhiskResources.deserialize(config, cache, handlers) @@ -182,7 +384,13 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config res._resources = resources return res - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """ + Update cache with current configuration values. + + Args: + cache: Cache instance to update with current configuration + """ cache.update_config(val=self.shutdownStorage, keys=["openwhisk", "shutdownStorage"]) cache.update_config(val=self.removeCluster, keys=["openwhisk", "removeCluster"]) cache.update_config(val=self.wsk_exec, keys=["openwhisk", "wskExec"]) diff --git a/sebs/openwhisk/container.py b/sebs/openwhisk/container.py index 2dd27717..be7fa1b2 100644 --- a/sebs/openwhisk/container.py +++ b/sebs/openwhisk/container.py @@ -1,3 +1,13 @@ +"""Docker container management for OpenWhisk functions in SeBS. + +This module provides OpenWhisk-specific Docker container management functionality, +handling Docker image registry configuration, image tagging, and repository naming +for OpenWhisk function deployments. + +Classes: + OpenWhiskContainer: OpenWhisk-specific Docker container management +""" + import docker from typing import Tuple @@ -7,12 +17,40 @@ class OpenWhiskContainer(DockerContainer): + """ + OpenWhisk-specific Docker container management. + + This class extends the base DockerContainer to provide OpenWhisk-specific + functionality for managing Docker images, registries, and container deployment. + It handles Docker registry authentication and image URI generation for + OpenWhisk function deployments. + + Attributes: + config: OpenWhisk configuration containing registry settings + + Example: + >>> container = OpenWhiskContainer(sys_config, ow_config, docker_client, True) + >>> registry, repo, tag, uri = container.registry_name("benchmark", "python", "3.8", "x86_64") + """ + @staticmethod def name() -> str: + """ + Get the platform name identifier. + + Returns: + Platform name as string + """ return "openwhisk" @staticmethod def typename() -> str: + """ + Get the container type name. + + Returns: + Container type name as string + """ return "OpenWhisk.Container" def __init__( @@ -21,14 +59,46 @@ def __init__( config: OpenWhiskConfig, docker_client: docker.client, experimental_manifest: bool, - ): + ) -> None: + """ + Initialize OpenWhisk container manager. + + Args: + system_config: Global SeBS system configuration + config: OpenWhisk-specific configuration settings + docker_client: Docker client for container operations + experimental_manifest: Whether to use experimental manifest features + """ super().__init__(system_config, docker_client, experimental_manifest) self.config = config def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: - + """ + Generate Docker registry information for a benchmark image. + + This method creates the appropriate registry name, repository name, image tag, + and complete image URI based on the benchmark parameters and OpenWhisk + configuration. It handles both custom registries and Docker Hub. + + Args: + benchmark: Name of the benchmark + language_name: Programming language (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (e.g., 'x86_64') + + Returns: + Tuple containing: + - Registry name (e.g., "my-registry.com" or "Docker Hub") + - Full repository name with registry prefix + - Image tag + - Complete image URI + + Example: + >>> registry, repo, tag, uri = container.registry_name("test", "python", "3.8", "x86_64") + >>> # Returns: ("Docker Hub", "sebs", "openwhisk-test-python-3.8-x86_64", "sebs:openwhisk-test-python-3.8-x86_64") + """ registry_name = self.config.resources.docker_registry # We need to retag created images when pushing to registry other diff --git a/sebs/openwhisk/function.py b/sebs/openwhisk/function.py index daf851ca..685797f9 100644 --- a/sebs/openwhisk/function.py +++ b/sebs/openwhisk/function.py @@ -1,6 +1,17 @@ +"""OpenWhisk function and configuration classes for SeBS. + +This module provides OpenWhisk-specific implementations of function configuration +and function management for the SeBS benchmarking framework. It handles function +configuration serialization, Docker image management, and storage integration. + +Classes: + OpenWhiskFunctionConfig: Configuration data class for OpenWhisk functions + OpenWhiskFunction: OpenWhisk-specific function implementation +""" + from __future__ import annotations -from typing import cast, Optional +from typing import cast, Optional, Dict, Any from dataclasses import dataclass from sebs.benchmark import Benchmark @@ -10,6 +21,23 @@ @dataclass class OpenWhiskFunctionConfig(FunctionConfig): + """ + Configuration data class for OpenWhisk functions. + + This class extends the base FunctionConfig to include OpenWhisk-specific + configuration parameters such as Docker image information, namespace settings, + and storage configurations for both object and NoSQL storage. + + Attributes: + docker_image: Docker image URI used for the function deployment + namespace: OpenWhisk namespace (default: "_" for default namespace) + object_storage: Minio object storage configuration if required + nosql_storage: ScyllaDB NoSQL storage configuration if required + + Note: + The docker_image attribute should be merged with higher-level + image abstraction in future refactoring. + """ # FIXME: merge with higher level abstraction for images docker_image: str = "" @@ -18,7 +46,16 @@ class OpenWhiskFunctionConfig(FunctionConfig): nosql_storage: Optional[ScyllaDBConfig] = None @staticmethod - def deserialize(data: dict) -> OpenWhiskFunctionConfig: + def deserialize(data: Dict[str, Any]) -> OpenWhiskFunctionConfig: + """ + Deserialize configuration from dictionary data. + + Args: + data: Dictionary containing serialized configuration data + + Returns: + OpenWhiskFunctionConfig instance with deserialized data + """ keys = list(OpenWhiskFunctionConfig.__dataclass_fields__.keys()) data = {k: v for k, v in data.items() if k in keys} data["runtime"] = Runtime.deserialize(data["runtime"]) @@ -26,35 +63,105 @@ def deserialize(data: dict) -> OpenWhiskFunctionConfig: data["nosql_storage"] = ScyllaDBConfig.deserialize(data["nosql_storage"]) return OpenWhiskFunctionConfig(**data) - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """ + Serialize configuration to dictionary format. + + Returns: + Dictionary containing all configuration data + """ return self.__dict__ @staticmethod def from_benchmark(benchmark: Benchmark) -> OpenWhiskFunctionConfig: + """ + Create configuration from benchmark specification. + + Args: + benchmark: Benchmark instance containing configuration requirements + + Returns: + OpenWhiskFunctionConfig instance initialized from benchmark + """ return super(OpenWhiskFunctionConfig, OpenWhiskFunctionConfig)._from_benchmark( benchmark, OpenWhiskFunctionConfig ) class OpenWhiskFunction(Function): + """ + OpenWhisk-specific function implementation for SeBS. + + This class provides OpenWhisk-specific function management including + configuration handling, serialization, and trigger management. It integrates + with OpenWhisk actions and maintains Docker image information. + + Attributes: + _cfg: OpenWhisk-specific function configuration + + Example: + >>> config = OpenWhiskFunctionConfig.from_benchmark(benchmark) + >>> function = OpenWhiskFunction("test-func", "benchmark-name", "hash123", config) + """ + def __init__( self, name: str, benchmark: str, code_package_hash: str, cfg: OpenWhiskFunctionConfig - ): + ) -> None: + """ + Initialize OpenWhisk function. + + Args: + name: Function name (OpenWhisk action name) + benchmark: Name of the benchmark this function implements + code_package_hash: Hash of the code package for cache validation + cfg: OpenWhisk-specific function configuration + """ super().__init__(benchmark, name, code_package_hash, cfg) @property def config(self) -> OpenWhiskFunctionConfig: + """ + Get OpenWhisk-specific function configuration. + + Returns: + OpenWhiskFunctionConfig instance with current settings + """ return cast(OpenWhiskFunctionConfig, self._cfg) @staticmethod def typename() -> str: + """ + Get the type name for this function class. + + Returns: + String identifier for OpenWhisk functions + """ return "OpenWhisk.Function" - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """ + Serialize function to dictionary format. + + Returns: + Dictionary containing function data and OpenWhisk-specific configuration + """ return {**super().serialize(), "config": self._cfg.serialize()} @staticmethod - def deserialize(cached_config: dict) -> OpenWhiskFunction: + def deserialize(cached_config: Dict[str, Any]) -> OpenWhiskFunction: + """ + Deserialize function from cached configuration data. + + Args: + cached_config: Dictionary containing cached function configuration + and trigger information + + Returns: + OpenWhiskFunction instance with deserialized configuration and triggers + + Raises: + AssertionError: If unknown trigger type is encountered + """ from sebs.faas.function import Trigger from sebs.openwhisk.triggers import LibraryTrigger, HTTPTrigger diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index 9c196fe2..bb9f4be0 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -1,6 +1,18 @@ +""" +Apache OpenWhisk serverless platform implementation for SeBS. + +This module provides the main OpenWhisk system class that integrates OpenWhisk +serverless platform with the SeBS benchmarking framework. It handles function +deployment, execution, monitoring, and resource management for OpenWhisk clusters. + +The implementation supports both local and remote OpenWhisk deployments, +Docker-based function packaging, and various trigger types including HTTP +and library-based invocations. +""" + import os import subprocess -from typing import cast, Dict, List, Optional, Tuple, Type +from typing import cast, Dict, List, Optional, Tuple, Type, Any import docker @@ -21,6 +33,23 @@ class OpenWhisk(System): + """ + Apache OpenWhisk serverless platform implementation for SeBS. + + This class provides the main integration between SeBS and Apache OpenWhisk, + handling function deployment, execution, container management, and resource + allocation. It supports both local and remote OpenWhisk deployments with + Docker-based function packaging. + + Attributes: + _config: OpenWhisk-specific configuration settings + container_client: Docker container client for function packaging + logging_handlers: Logging handlers for the OpenWhisk system + + Example: + >>> openwhisk = OpenWhisk(sys_config, ow_config, cache, docker_client, handlers) + >>> function = openwhisk.create_function(benchmark, "test-func", True, "image:tag") + """ _config: OpenWhiskConfig def __init__( @@ -30,7 +59,17 @@ def __init__( cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, - ): + ) -> None: + """ + Initialize OpenWhisk system with configuration and clients. + + Args: + system_config: Global SeBS system configuration + config: OpenWhisk-specific configuration settings + cache_client: Cache client for storing function and resource data + docker_client: Docker client for container operations + logger_handlers: Logging handlers for system operations + """ super().__init__( system_config, cache_client, @@ -59,14 +98,33 @@ def __init__( password=self.config.resources.docker_password, ) - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None) -> None: + """ + Initialize OpenWhisk system resources. + + Args: + config: Additional configuration parameters (currently unused) + resource_prefix: Optional prefix for resource naming + """ self.initialize_resources(select_prefix=resource_prefix) @property def config(self) -> OpenWhiskConfig: + """ + Get OpenWhisk configuration. + + Returns: + OpenWhisk configuration instance + """ return self._config def shutdown(self) -> None: + """ + Shutdown OpenWhisk system and clean up resources. + + This method stops storage services if configured and optionally + removes the OpenWhisk cluster based on configuration settings. + """ if hasattr(self, "storage") and self.config.shutdownStorage: self.storage.stop() if self.config.removeCluster: @@ -77,17 +135,41 @@ def shutdown(self) -> None: @staticmethod def name() -> str: + """ + Get the platform name identifier. + + Returns: + Platform name as string + """ return "openwhisk" @staticmethod - def typename(): + def typename() -> str: + """ + Get the platform type name. + + Returns: + Platform type name as string + """ return "OpenWhisk" @staticmethod def function_type() -> "Type[Function]": + """ + Get the function type for this platform. + + Returns: + OpenWhiskFunction class type + """ return OpenWhiskFunction def get_wsk_cmd(self) -> List[str]: + """ + Get the WSK CLI command with appropriate flags. + + Returns: + List of command arguments for WSK CLI execution + """ cmd = [self.config.wsk_exec] if self.config.wsk_bypass_security: cmd.append("-i") @@ -103,6 +185,31 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """ + Package benchmark code for OpenWhisk deployment. + + Creates both a Docker image and a ZIP archive containing the benchmark code. + The ZIP archive is required for OpenWhisk function registration even when + using Docker-based deployment. + + Args: + directory: Path to the benchmark code directory + language_name: Programming language (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (e.g., 'x86_64') + benchmark: Benchmark name + is_cached: Whether Docker image is already cached + container_deployment: Whether to use container-based deployment + + Returns: + Tuple containing: + - Path to created ZIP archive + - Size of ZIP archive in bytes + - Docker image URI + + Raises: + RuntimeError: If packaging fails + """ # Regardless of Docker image status, we need to create .zip file # to allow registration of function with OpenWhisk @@ -128,6 +235,18 @@ def package_code( return benchmark_archive, bytes_size, image_uri def storage_arguments(self, code_package: Benchmark) -> List[str]: + """ + Generate storage-related arguments for function deployment. + + Creates WSK CLI parameters for Minio object storage and ScyllaDB NoSQL + storage configurations based on the benchmark requirements. + + Args: + code_package: Benchmark configuration requiring storage access + + Returns: + List of WSK CLI parameter arguments for storage configuration + """ envs = [] if self.config.resources.storage_config: @@ -169,6 +288,25 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "OpenWhiskFunction": + """ + Create or retrieve an OpenWhisk function (action). + + This method checks if a function already exists and updates it if necessary, + or creates a new function with the appropriate configuration, storage settings, + and Docker image. + + Args: + code_package: Benchmark configuration and code package + func_name: Name for the OpenWhisk action + container_deployment: Whether to use container-based deployment + container_uri: URI of the Docker image for the function + + Returns: + OpenWhiskFunction instance configured with LibraryTrigger + + Raises: + RuntimeError: If WSK CLI is not accessible or function creation fails + """ self.logging.info("Creating function as an action in OpenWhisk.") try: actions = subprocess.run( @@ -254,7 +392,19 @@ def update_function( code_package: Benchmark, container_deployment: bool, container_uri: str, - ): + ) -> None: + """ + Update an existing OpenWhisk function with new code and configuration. + + Args: + function: Existing function to update + code_package: New benchmark configuration and code package + container_deployment: Whether to use container-based deployment + container_uri: URI of the new Docker image + + Raises: + RuntimeError: If WSK CLI is not accessible or update fails + """ self.logging.info(f"Update an existing OpenWhisk action {function.name}.") function = cast(OpenWhiskFunction, function) docker_image = self.system_config.benchmark_image_name( @@ -297,7 +447,20 @@ def update_function( self.logging.error(f"Output: {e.stderr.decode('utf-8')}") raise RuntimeError(e) - def update_function_configuration(self, function: Function, code_package: Benchmark): + def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: + """ + Update configuration of an existing OpenWhisk function. + + Updates memory allocation, timeout, and storage parameters without + changing the function code or Docker image. + + Args: + function: Function to update configuration for + code_package: New benchmark configuration settings + + Raises: + RuntimeError: If WSK CLI is not accessible or configuration update fails + """ self.logging.info(f"Update configuration of an existing OpenWhisk action {function.name}.") try: subprocess.run( @@ -326,6 +489,19 @@ def update_function_configuration(self, function: Function, code_package: Benchm raise RuntimeError(e) def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: + """ + Check if function configuration has changed compared to cached version. + + Compares current benchmark configuration and storage settings with the + cached function configuration to determine if an update is needed. + + Args: + cached_function: Previously cached function configuration + benchmark: Current benchmark configuration to compare against + + Returns: + True if configuration has changed and function needs updating + """ changed = super().is_configuration_changed(cached_function, benchmark) storage = cast(Minio, self.system_resources.get_storage()) @@ -353,13 +529,33 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """ + Generate default function name based on benchmark and resource configuration. + + Args: + code_package: Benchmark package containing name and language info + resources: Optional specific resources to use for naming + + Returns: + Generated function name string + """ resource_id = resources.resources_id if resources else self.config.resources.resources_id return ( f"sebs-{resource_id}-{code_package.benchmark}-" f"{code_package.language_name}-{code_package.language_version}" ) - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """ + Enforce cold start for functions (not implemented for OpenWhisk). + + Args: + functions: List of functions to enforce cold start for + code_package: Benchmark package configuration + + Raises: + NotImplementedError: Cold start enforcement not implemented for OpenWhisk + """ raise NotImplementedError() def download_metrics( @@ -369,10 +565,36 @@ def download_metrics( end_time: int, requests: Dict[str, ExecutionResult], metrics: dict, - ): + ) -> None: + """ + Download metrics for function executions (no-op for OpenWhisk). + + Args: + function_name: Name of the function to download metrics for + start_time: Start time for metrics collection (epoch timestamp) + end_time: End time for metrics collection (epoch timestamp) + requests: Dictionary mapping request IDs to execution results + metrics: Dictionary to store downloaded metrics + + Note: + OpenWhisk metrics collection is not currently implemented. + """ pass def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a trigger for function invocation. + + Args: + function: Function to create trigger for + trigger_type: Type of trigger to create (LIBRARY or HTTP) + + Returns: + Created trigger instance + + Raises: + RuntimeError: If WSK CLI is not accessible or trigger type not supported + """ if trigger_type == Trigger.TriggerType.LIBRARY: return function.triggers(Trigger.TriggerType.LIBRARY)[0] elif trigger_type == Trigger.TriggerType.HTTP: @@ -398,12 +620,26 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) else: raise RuntimeError("Not supported!") - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """ + Configure a cached function with current system settings. + + Updates triggers with current logging handlers and WSK command configuration. + + Args: + function: Cached function to configure + """ for trigger in function.triggers(Trigger.TriggerType.LIBRARY): trigger.logging_handlers = self.logging_handlers cast(LibraryTrigger, trigger).wsk_cmd = self.get_wsk_cmd() for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers - def disable_rich_output(self): + def disable_rich_output(self) -> None: + """ + Disable rich output formatting for container operations. + + This is useful for non-interactive environments or when plain text + output is preferred. + """ self.container_client.disable_rich_output = True diff --git a/sebs/openwhisk/triggers.py b/sebs/openwhisk/triggers.py index f0d8260b..1dd3bb1d 100644 --- a/sebs/openwhisk/triggers.py +++ b/sebs/openwhisk/triggers.py @@ -1,14 +1,48 @@ +"""Trigger implementations for OpenWhisk function invocation in SeBS. + +This module provides different trigger types for invoking OpenWhisk functions, +including library-based (CLI) triggers and HTTP-based triggers. Each trigger +type handles the specific invocation method and result parsing for OpenWhisk. + +Classes: + LibraryTrigger: CLI-based function invocation using wsk tool + HTTPTrigger: HTTP-based function invocation using web actions +""" + import concurrent.futures import datetime import json import subprocess -from typing import Dict, List, Optional # noqa +from typing import Dict, List, Optional, Any # noqa from sebs.faas.function import ExecutionResult, Trigger class LibraryTrigger(Trigger): - def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None): + """ + CLI-based trigger for OpenWhisk function invocation. + + This trigger uses the wsk CLI tool to invoke OpenWhisk actions directly, + providing synchronous and asynchronous invocation capabilities. It handles + parameter passing and result parsing for CLI-based invocations. + + Attributes: + fname: Name of the OpenWhisk action to invoke + _wsk_cmd: Complete WSK CLI command for function invocation + + Example: + >>> trigger = LibraryTrigger("my-function", ["wsk", "-i"]) + >>> result = trigger.sync_invoke({"key": "value"}) + """ + + def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None) -> None: + """ + Initialize library trigger for OpenWhisk function. + + Args: + fname: Name of the OpenWhisk action to invoke + wsk_cmd: Optional WSK CLI command prefix (including flags) + """ super().__init__() self.fname = fname if wsk_cmd: @@ -16,19 +50,53 @@ def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None): @staticmethod def trigger_type() -> "Trigger.TriggerType": + """ + Get the trigger type identifier. + + Returns: + TriggerType.LIBRARY for CLI-based invocation + """ return Trigger.TriggerType.LIBRARY @property def wsk_cmd(self) -> List[str]: + """ + Get the complete WSK CLI command for invocation. + + Returns: + List of command arguments for WSK CLI invocation + + Raises: + AssertionError: If wsk_cmd has not been set + """ assert self._wsk_cmd return self._wsk_cmd @wsk_cmd.setter - def wsk_cmd(self, wsk_cmd: List[str]): + def wsk_cmd(self, wsk_cmd: List[str]) -> None: + """ + Set the WSK CLI command prefix. + + Args: + wsk_cmd: WSK CLI command prefix (including any flags) + """ self._wsk_cmd = [*wsk_cmd, "action", "invoke", "--result", self.fname] @staticmethod - def get_command(payload: dict) -> List[str]: + def get_command(payload: Dict[str, Any]) -> List[str]: + """ + Convert payload dictionary to WSK CLI parameter arguments. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + List of CLI arguments for passing parameters to WSK + + Example: + >>> get_command({"key1": "value1", "key2": 42}) + ["--param", "key1", '"value1"', "--param", "key2", "42"] + """ params = [] for key, value in payload.items(): params.append("--param") @@ -36,7 +104,16 @@ def get_command(payload: dict) -> List[str]: params.append(json.dumps(value)) return params - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: Dict[str, Any]) -> ExecutionResult: + """ + Synchronously invoke the OpenWhisk function via CLI. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + ExecutionResult containing timing information and function output + """ command = self.wsk_cmd + self.get_command(payload) error = None try: @@ -63,49 +140,147 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: openwhisk_result.parse_benchmark_output(return_content) return openwhisk_result - def async_invoke(self, payload: dict) -> concurrent.futures.Future: + def async_invoke(self, payload: Dict[str, Any]) -> concurrent.futures.Future: + """ + Asynchronously invoke the OpenWhisk function via CLI. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + Future object that will contain the ExecutionResult + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut - def serialize(self) -> dict: + def serialize(self) -> Dict[str, str]: + """ + Serialize trigger configuration to dictionary. + + Returns: + Dictionary containing trigger type and function name + """ return {"type": "Library", "name": self.fname} @staticmethod - def deserialize(obj: dict) -> Trigger: + def deserialize(obj: Dict[str, str]) -> Trigger: + """ + Deserialize trigger from configuration dictionary. + + Args: + obj: Dictionary containing serialized trigger data + + Returns: + LibraryTrigger instance + """ return LibraryTrigger(obj["name"]) @staticmethod def typename() -> str: + """ + Get the trigger type name. + + Returns: + String identifier for this trigger type + """ return "OpenWhisk.LibraryTrigger" class HTTPTrigger(Trigger): - def __init__(self, fname: str, url: str): + """ + HTTP-based trigger for OpenWhisk web action invocation. + + This trigger uses HTTP requests to invoke OpenWhisk web actions, + providing an alternative to CLI-based invocation. It inherits HTTP + invocation capabilities from the base Trigger class. + + Attributes: + fname: Name of the OpenWhisk action + url: HTTP URL for the web action endpoint + + Example: + >>> trigger = HTTPTrigger("my-function", "https://openwhisk.example.com/api/v1/web/guest/default/my-function.json") + >>> result = trigger.sync_invoke({"key": "value"}) + """ + + def __init__(self, fname: str, url: str) -> None: + """ + Initialize HTTP trigger for OpenWhisk web action. + + Args: + fname: Name of the OpenWhisk action + url: HTTP URL for the web action endpoint + """ super().__init__() self.fname = fname self.url = url @staticmethod def typename() -> str: + """ + Get the trigger type name. + + Returns: + String identifier for this trigger type + """ return "OpenWhisk.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """ + Get the trigger type identifier. + + Returns: + TriggerType.HTTP for HTTP-based invocation + """ return Trigger.TriggerType.HTTP - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: Dict[str, Any]) -> ExecutionResult: + """ + Synchronously invoke the OpenWhisk function via HTTP. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + ExecutionResult containing timing information and function output + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url, False) - def async_invoke(self, payload: dict) -> concurrent.futures.Future: + def async_invoke(self, payload: Dict[str, Any]) -> concurrent.futures.Future: + """ + Asynchronously invoke the OpenWhisk function via HTTP. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + Future object that will contain the ExecutionResult + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut - def serialize(self) -> dict: + def serialize(self) -> Dict[str, str]: + """ + Serialize trigger configuration to dictionary. + + Returns: + Dictionary containing trigger type, function name, and URL + """ return {"type": "HTTP", "fname": self.fname, "url": self.url} @staticmethod - def deserialize(obj: dict) -> Trigger: + def deserialize(obj: Dict[str, str]) -> Trigger: + """ + Deserialize trigger from configuration dictionary. + + Args: + obj: Dictionary containing serialized trigger data + + Returns: + HTTPTrigger instance + """ return HTTPTrigger(obj["fname"], obj["url"]) diff --git a/sebs/statistics.py b/sebs/statistics.py index 9189a787..9598b834 100644 --- a/sebs/statistics.py +++ b/sebs/statistics.py @@ -56,24 +56,24 @@ def ci_tstudents(alpha: float, times: List[float]) -> Tuple[float, float]: def ci_le_boudec(alpha: float, times: List[float]) -> Tuple[float, float]: """Compute non-parametric confidence interval using Le Boudec's method. - + This function computes a confidence interval for the median of the given measurement times using the method described by Le Boudec. This is a non-parametric method that does not assume any particular distribution of the data. - + Reference: J.-Y. Le Boudec, "Methods for the Estimation of the Accuracy of Measurements in Computer Performance Evaluation", Performance Evaluation Review, 2010 - + Args: alpha: Confidence level (e.g., 0.95 for 95% confidence) times: List of measurement times - + Returns: A tuple (lower, upper) representing the confidence interval - + Raises: AssertionError: If an unsupported confidence level is provided """ diff --git a/sebs/storage/__init__.py b/sebs/storage/__init__.py index e69de29b..969ebac6 100644 --- a/sebs/storage/__init__.py +++ b/sebs/storage/__init__.py @@ -0,0 +1,33 @@ +"""Storage module for the Serverless Benchmarking Suite (SeBS). + +This module provides storage abstractions and implementations for SeBS, supporting +both object storage (S3-compatible) and NoSQL database storage. It includes: + +- Configuration classes for different storage backends +- MinIO implementation for local S3-compatible object storage +- ScyllaDB implementation for local DynamoDB-compatible NoSQL storage +- Resource management classes for self-hosted storage deployments + +The storage module enables benchmarks to work with persistent data storage +across different deployment environments while maintaining consistent interfaces. + +Key Components: + - config: Configuration dataclasses for storage backends + - minio: MinIO-based object storage implementation + - scylladb: ScyllaDB-based NoSQL storage implementation + - resources: Resource management for self-hosted storage deployments + +Example: + To use MinIO object storage in a benchmark: + + ```python + from sebs.storage.minio import Minio + from sebs.storage.config import MinioConfig + + # Configure and start MinIO + config = MinioConfig(mapped_port=9000, version="latest") + storage = Minio(docker_client, cache_client, resources, False) + storage.config = config + storage.start() + ``` +""" \ No newline at end of file diff --git a/sebs/storage/config.py b/sebs/storage/config.py index cd47df39..a5124e4a 100644 --- a/sebs/storage/config.py +++ b/sebs/storage/config.py @@ -1,25 +1,79 @@ -from abc import ABC -from abc import abstractmethod -from typing import List +"""Configuration classes for storage backends in the Serverless Benchmarking Suite. +This module provides dataclass-based configuration objects for different storage +backends supported by SeBS. It includes abstract base classes that define the +interface for storage configurations, as well as concrete implementations for +specific storage systems. + +Key Classes: + PersistentStorageConfig: Abstract base for object storage configurations + MinioConfig: Configuration for MinIO S3-compatible object storage + NoSQLStorageConfig: Abstract base for NoSQL database configurations + ScyllaDBConfig: Configuration for ScyllaDB DynamoDB-compatible storage + +All configuration classes support serialization/deserialization for caching +and provide environment variable mappings for runtime configuration. +""" + +from abc import ABC, abstractmethod from dataclasses import dataclass, field +from typing import Dict, List from sebs.cache import Cache @dataclass class PersistentStorageConfig(ABC): + """Abstract base class for persistent object storage configuration. + + This class defines the interface that all object storage configurations + must implement. It provides methods for serialization and environment + variable generation that are used for caching and runtime configuration. + + Subclasses must implement: + - serialize(): Convert configuration to dictionary for caching + - envs(): Generate environment variables for benchmark runtime + """ + @abstractmethod - def serialize(self) -> dict: + def serialize(self) -> Dict[str, any]: + """Serialize the configuration to a dictionary. + + Returns: + Dict[str, any]: Serialized configuration data suitable for JSON storage + """ pass @abstractmethod - def envs(self) -> dict: + def envs(self) -> Dict[str, str]: + """Generate environment variables for the storage configuration. + + Returns: + Dict[str, str]: Environment variables to be set in benchmark runtime + """ pass @dataclass class MinioConfig(PersistentStorageConfig): + """Configuration for MinIO S3-compatible object storage. + + MinIO provides a local S3-compatible object storage service that runs in + a Docker container. This configuration class stores all the necessary + parameters for deploying and connecting to a MinIO instance. + + Attributes: + address: Network address where MinIO is accessible (auto-detected) + mapped_port: Host port mapped to MinIO's internal port 9000 + access_key: Access key for MinIO authentication (auto-generated) + secret_key: Secret key for MinIO authentication (auto-generated) + instance_id: Docker container ID of the running MinIO instance + output_buckets: List of bucket names used for benchmark output + input_buckets: List of bucket names used for benchmark input + version: MinIO Docker image version to use + data_volume: Host directory path for persistent data storage + type: Storage type identifier (always "minio") + """ address: str = "" mapped_port: int = -1 access_key: str = "" @@ -31,16 +85,36 @@ class MinioConfig(PersistentStorageConfig): data_volume: str = "" type: str = "minio" - def update_cache(self, path: List[str], cache: Cache): - + def update_cache(self, path: List[str], cache: Cache) -> None: + """Update the cache with this configuration's values. + + Stores all configuration fields in the cache using the specified path + as a prefix. This allows the configuration to be restored later from + the cache. + + Args: + path: Cache key path prefix for this configuration + cache: Cache instance to store configuration in + """ for key in MinioConfig.__dataclass_fields__.keys(): if key == "resources": continue cache.update_config(val=getattr(self, key), keys=[*path, key]) - # self.resources.update_cache(cache) @staticmethod - def deserialize(data: dict) -> "MinioConfig": + def deserialize(data: Dict[str, any]) -> "MinioConfig": + """Deserialize configuration from a dictionary. + + Creates a new MinioConfig instance from dictionary data, typically + loaded from cache or configuration files. Only known configuration + fields are used, unknown fields are ignored. + + Args: + data: Dictionary containing configuration data + + Returns: + MinioConfig: New configuration instance + """ keys = list(MinioConfig.__dataclass_fields__.keys()) data = {k: v for k, v in data.items() if k in keys} @@ -48,10 +122,23 @@ def deserialize(data: dict) -> "MinioConfig": return cfg - def serialize(self) -> dict: + def serialize(self) -> Dict[str, any]: + """Serialize the configuration to a dictionary. + + Returns: + Dict[str, any]: All configuration fields as a dictionary + """ return self.__dict__ - def envs(self) -> dict: + def envs(self) -> Dict[str, str]: + """Generate environment variables for MinIO configuration. + + Creates environment variables that can be used by benchmark functions + to connect to the MinIO storage instance. + + Returns: + Dict[str, str]: Environment variables for MinIO connection + """ return { "MINIO_ADDRESS": self.address, "MINIO_ACCESS_KEY": self.access_key, @@ -61,13 +148,47 @@ def envs(self) -> dict: @dataclass class NoSQLStorageConfig(ABC): + """Abstract base class for NoSQL database storage configuration. + + This class defines the interface that all NoSQL storage configurations + must implement. It provides serialization methods used for caching + and configuration management. + + Subclasses must implement: + - serialize(): Convert configuration to dictionary for caching + """ + @abstractmethod - def serialize(self) -> dict: + def serialize(self) -> Dict[str, any]: + """Serialize the configuration to a dictionary. + + Returns: + Dict[str, any]: Serialized configuration data suitable for JSON storage + """ pass @dataclass class ScyllaDBConfig(NoSQLStorageConfig): + """Configuration for ScyllaDB DynamoDB-compatible NoSQL storage. + + ScyllaDB provides a high-performance NoSQL database with DynamoDB-compatible + API through its Alternator interface. This configuration class stores all + the necessary parameters for deploying and connecting to a ScyllaDB instance. + + Attributes: + address: Network address where ScyllaDB is accessible (auto-detected) + mapped_port: Host port mapped to ScyllaDB's Alternator port + alternator_port: Internal port for DynamoDB-compatible API (default: 8000) + access_key: Access key for DynamoDB API (placeholder value) + secret_key: Secret key for DynamoDB API (placeholder value) + instance_id: Docker container ID of the running ScyllaDB instance + region: AWS region placeholder (not used for local deployment) + cpus: Number of CPU cores allocated to ScyllaDB container + memory: Memory allocation in MB for ScyllaDB container + version: ScyllaDB Docker image version to use + data_volume: Host directory path for persistent data storage + """ address: str = "" mapped_port: int = -1 alternator_port: int = 8000 @@ -80,13 +201,34 @@ class ScyllaDBConfig(NoSQLStorageConfig): version: str = "" data_volume: str = "" - def update_cache(self, path: List[str], cache: Cache): - + def update_cache(self, path: List[str], cache: Cache) -> None: + """Update the cache with this configuration's values. + + Stores all configuration fields in the cache using the specified path + as a prefix. This allows the configuration to be restored later from + the cache. + + Args: + path: Cache key path prefix for this configuration + cache: Cache instance to store configuration in + """ for key in ScyllaDBConfig.__dataclass_fields__.keys(): cache.update_config(val=getattr(self, key), keys=[*path, key]) @staticmethod - def deserialize(data: dict) -> "ScyllaDBConfig": + def deserialize(data: Dict[str, any]) -> "ScyllaDBConfig": + """Deserialize configuration from a dictionary. + + Creates a new ScyllaDBConfig instance from dictionary data, typically + loaded from cache or configuration files. Only known configuration + fields are used, unknown fields are ignored. + + Args: + data: Dictionary containing configuration data + + Returns: + ScyllaDBConfig: New configuration instance + """ keys = list(ScyllaDBConfig.__dataclass_fields__.keys()) data = {k: v for k, v in data.items() if k in keys} @@ -94,5 +236,10 @@ def deserialize(data: dict) -> "ScyllaDBConfig": return cfg - def serialize(self) -> dict: + def serialize(self) -> Dict[str, any]: + """Serialize the configuration to a dictionary. + + Returns: + Dict[str, any]: All configuration fields as a dictionary + """ return self.__dict__ diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index db9b1f9e..76b2413f 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -12,7 +12,7 @@ import os import secrets import uuid -from typing import List, Optional, Type, TypeVar +from typing import Any, Dict, List, Optional, Type, TypeVar import docker import minio @@ -63,7 +63,7 @@ def deployment_name() -> str: def __init__( self, - docker_client: docker.client, + docker_client: docker.DockerClient, cache_client: Cache, resources: Resources, replace_existing: bool, @@ -78,8 +78,8 @@ def __init__( replace_existing: Whether to replace existing buckets """ super().__init__(self.MINIO_REGION, cache_client, resources, replace_existing) - self._docker_client = docker_client - self._storage_container: Optional[docker.container] = None + self._docker_client: docker.DockerClient = docker_client + self._storage_container: Optional[docker.models.containers.Container] = None self._cfg = MinioConfig() @property @@ -103,7 +103,7 @@ def config(self, config: MinioConfig): self._cfg = config @staticmethod - def _define_http_client(): + def _define_http_client() -> Any: """ Configure HTTP client for MinIO with appropriate timeouts and retries. @@ -127,7 +127,7 @@ def _define_http_client(): ), ) - def start(self): + def start(self) -> None: """ Start a MinIO storage container. @@ -189,7 +189,7 @@ def start(self): self.logging.error("Starting Minio storage failed! Unknown error: {}".format(e)) raise RuntimeError("Starting Minio storage unsuccesful") - def configure_connection(self): + def configure_connection(self) -> None: """ Configure the connection to the MinIO container. @@ -228,17 +228,17 @@ def configure_connection(self): if not self._cfg.address: self.logging.error( f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" + f"{json.dumps(self._storage_container.attrs, indent=2)}" ) raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" + f"Incorrect detection of IP address for container with id {self._cfg.instance_id}" ) self.logging.info("Starting minio instance at {}".format(self._cfg.address)) # Create the connection using the configured address self.connection = self.get_connection() - def stop(self): + def stop(self) -> None: """ Stop the MinIO container. @@ -252,7 +252,7 @@ def stop(self): else: self.logging.error("Stopping minio was not successful, storage container not known!") - def get_connection(self): + def get_connection(self) -> minio.Minio: """ Create a new MinIO client connection. @@ -270,7 +270,7 @@ def get_connection(self): http_client=Minio._define_http_client(), ) - def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: bool = False): + def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: bool = False) -> str: """ Create a new bucket if it doesn't already exist. @@ -315,7 +315,7 @@ def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: boo # Rethrow the error for handling by the caller raise err - def uploader_func(self, path_idx, file, filepath): + def uploader_func(self, path_idx: int, file: str, filepath: str) -> None: """ Upload a file to the MinIO storage. @@ -338,7 +338,7 @@ def uploader_func(self, path_idx, file, filepath): self.logging.error("Upload failed!") raise err - def clean(self): + def clean(self) -> None: """ Clean all objects from output buckets. @@ -351,7 +351,7 @@ def clean(self): for err in self.connection.remove_objects(bucket, objects): self.logging.error("Deletion Error: {}".format(err)) - def download_results(self, result_dir): + def download_results(self, result_dir: str) -> None: """ Download all objects from output buckets to a local directory. @@ -368,7 +368,7 @@ def download_results(self, result_dir): for obj in objects: self.connection.fget_object(bucket, obj, os.path.join(result_dir, obj)) - def clean_bucket(self, bucket: str): + def clean_bucket(self, bucket: str) -> None: """ Remove all objects from a bucket. @@ -386,7 +386,7 @@ def clean_bucket(self, bucket: str): for error in errors: self.logging.error(f"Error when deleting object from bucket {bucket}: {error}!") - def remove_bucket(self, bucket: str): + def remove_bucket(self, bucket: str) -> None: """ Delete a bucket completely. @@ -413,7 +413,7 @@ def correct_name(self, name: str) -> str: """ return name - def download(self, bucket_name: str, key: str, filepath: str): + def download(self, bucket_name: str, key: str, filepath: str) -> None: """ Download an object from a bucket to a local file. @@ -472,7 +472,7 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: else: return [bucket.name for bucket in buckets] - def upload(self, bucket_name: str, filepath: str, key: str): + def upload(self, bucket_name: str, filepath: str, key: str) -> None: """ Upload a file to a bucket. @@ -483,7 +483,7 @@ def upload(self, bucket_name: str, filepath: str, key: str): """ raise NotImplementedError() - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: """ Serialize MinIO configuration to a dictionary. diff --git a/sebs/storage/resources.py b/sebs/storage/resources.py index a85e725e..d4b4b3fd 100644 --- a/sebs/storage/resources.py +++ b/sebs/storage/resources.py @@ -1,4 +1,24 @@ -from typing import cast, Optional, Tuple +"""Resource management for self-hosted storage deployments in SeBS. + +This module provides resource management classes for self-hosted storage +deployments, including both object storage (MinIO) and NoSQL storage (ScyllaDB). +It handles configuration deserialization, container lifecycle management, and +provides unified interfaces for accessing storage services. + +Key Classes: + SelfHostedResources: Configuration management for self-hosted storage resources + SelfHostedSystemResources: System-level resource management and service provisioning + +The module supports: + - MinIO for S3-compatible object storage + - ScyllaDB for DynamoDB-compatible NoSQL storage + - Configuration caching and deserialization + - Docker container lifecycle management + - Dynamic service discovery and connection configuration +""" + +import docker +from typing import cast, Dict, Optional, Tuple from sebs.cache import Cache from sebs.faas.config import Config, Resources @@ -15,30 +35,62 @@ ) from sebs.utils import LoggingHandlers -import docker - class SelfHostedResources(Resources): + """Resource configuration for self-hosted storage deployments. + + This class manages configuration for self-hosted storage services, + including object storage (MinIO) and NoSQL storage (ScyllaDB). It provides + serialization, caching, and deserialization capabilities for storage + configurations. + + Attributes: + _object_storage: Configuration for object storage (MinIO) + _nosql_storage: Configuration for NoSQL storage (ScyllaDB) + """ + def __init__( self, name: str, storage_cfg: Optional[PersistentStorageConfig] = None, nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, ): + """Initialize self-hosted resources configuration. + + Args: + name: Name of the deployment/resource group + storage_cfg: Configuration for object storage service + nosql_storage_cfg: Configuration for NoSQL storage service + """ super().__init__(name=name) self._object_storage = storage_cfg self._nosql_storage = nosql_storage_cfg @property def storage_config(self) -> Optional[PersistentStorageConfig]: + """Get the object storage configuration. + + Returns: + Optional[PersistentStorageConfig]: Object storage configuration or None + """ return self._object_storage @property def nosql_storage_config(self) -> Optional[NoSQLStorageConfig]: + """Get the NoSQL storage configuration. + + Returns: + Optional[NoSQLStorageConfig]: NoSQL storage configuration or None + """ return self._nosql_storage - def serialize(self) -> dict: - out: dict = {} + def serialize(self) -> Dict[str, any]: + """Serialize the resource configuration to a dictionary. + + Returns: + Dict[str, any]: Serialized configuration containing storage and/or nosql sections + """ + out: Dict[str, any] = {} if self._object_storage is not None: out = {**out, "storage": self._object_storage.serialize()} @@ -48,7 +100,15 @@ def serialize(self) -> dict: return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update the configuration cache with current resource settings. + + Stores both object storage and NoSQL storage configurations in the + cache for later retrieval. + + Args: + cache: Cache instance to store configurations in + """ super().update_cache(cache) if self._object_storage is not None: cast(MinioConfig, self._object_storage).update_cache( @@ -60,10 +120,23 @@ def update_cache(self, cache: Cache): ) def _deserialize_storage( - self, config: dict, cached_config: Optional[dict], storage_type: str - ) -> Tuple[str, dict]: + self, config: Dict[str, any], cached_config: Optional[Dict[str, any]], storage_type: str + ) -> Tuple[str, Dict[str, any]]: + """Deserialize storage configuration from config or cache. + + Attempts to load storage configuration from the provided config first, + then falls back to cached configuration if available. + + Args: + config: Current configuration dictionary + cached_config: Previously cached configuration dictionary + storage_type: Type of storage to deserialize ('object' or 'nosql') + + Returns: + Tuple[str, Dict[str, any]]: Storage implementation name and configuration + """ storage_impl = "" - storage_config = {} + storage_config: Dict[str, any] = {} # Check for new config if "storage" in config and storage_type in config["storage"]: @@ -91,7 +164,17 @@ def _deserialize_storage( return storage_impl, storage_config @staticmethod - def _deserialize(ret: "SelfHostedResources", config: dict, cached_config: dict): + def _deserialize(ret: "SelfHostedResources", config: Dict[str, any], cached_config: Dict[str, any]) -> None: + """Deserialize storage configurations from config and cache data. + + Populates the SelfHostedResources instance with storage configurations + loaded from the provided configuration and cached data. + + Args: + ret: SelfHostedResources instance to populate + config: Current configuration dictionary + cached_config: Previously cached configuration dictionary + """ obj_storage_impl, obj_storage_cfg = ret._deserialize_storage( config, cached_config, "object" ) @@ -118,6 +201,20 @@ def _deserialize(ret: "SelfHostedResources", config: dict, cached_config: dict): class SelfHostedSystemResources(SystemResources): + """System-level resource management for self-hosted storage deployments. + + This class manages the lifecycle and provisioning of self-hosted storage + services, including MinIO object storage and ScyllaDB NoSQL storage. It + handles container management, service initialization, and provides unified + access to storage services. + + Attributes: + _name: Name of the deployment + _logging_handlers: Logging configuration handlers + _storage: Active persistent storage instance (MinIO) + _nosql_storage: Active NoSQL storage instance (ScyllaDB) + """ + def __init__( self, name: str, @@ -126,6 +223,15 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """Initialize system resources for self-hosted storage. + + Args: + name: Name of the deployment + config: SeBS configuration object + cache_client: Cache client for configuration persistence + docker_client: Docker client for container management + logger_handlers: Logging configuration handlers + """ super().__init__(config, cache_client, docker_client) self._name = name @@ -133,17 +239,23 @@ def __init__( self._storage: Optional[PersistentStorage] = None self._nosql_storage: Optional[NoSQLStorage] = None - """ - Create wrapper object for minio storage and fill buckets. - Starts minio as a Docker instance, using always fresh buckets. - - :param benchmark: - :param buckets: number of input and output buckets - :param replace_existing: not used. - :return: Azure storage instance - """ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + """Get or create a persistent storage instance. + + Creates a MinIO storage instance if one doesn't exist, or returns the + existing instance. The storage is configured using the deployment's + storage configuration. + + Args: + replace_existing: Whether to replace existing buckets (optional) + + Returns: + PersistentStorage: MinIO storage instance + + Raises: + RuntimeError: If storage configuration is missing or unsupported + """ if self._storage is None: storage_config = cast(SelfHostedResources, self._config.resources).storage_config if storage_config is None: @@ -172,6 +284,18 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStor return self._storage def get_nosql_storage(self) -> NoSQLStorage: + """Get or create a NoSQL storage instance. + + Creates a ScyllaDB storage instance if one doesn't exist, or returns the + existing instance. The storage is configured using the deployment's + NoSQL storage configuration. + + Returns: + NoSQLStorage: ScyllaDB storage instance + + Raises: + RuntimeError: If NoSQL storage configuration is missing or unsupported + """ if self._nosql_storage is None: storage_config = cast(SelfHostedResources, self._config.resources).nosql_storage_config if storage_config is None: diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index aae97815..4e760c84 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -1,9 +1,25 @@ +"""ScyllaDB NoSQL storage implementation for the Serverless Benchmarking Suite. + +This module implements NoSQL database storage using ScyllaDB, which provides a +DynamoDB-compatible API through its Alternator interface. ScyllaDB runs in a +Docker container and provides high-performance NoSQL storage for benchmark data +that requires DynamoDB-compatible operations. + +The implementation uses boto3 with ScyllaDB's Alternator API to provide seamless +compatibility with DynamoDB operations while running locally for development +and testing purposes. +""" + import json import os import platform import time from collections import defaultdict -from typing import Dict, Optional, Tuple, Type, TypeVar +from typing import Any, Dict, Optional, Tuple, Type, TypeVar + +import boto3 +from boto3.dynamodb.types import TypeSerializer +import docker from sebs.cache import Cache from sebs.faas.config import Resources @@ -12,38 +28,72 @@ from sebs.storage.config import ScyllaDBConfig from sebs.utils import project_absolute_path -import boto3 -from boto3.dynamodb.types import TypeSerializer -import docker - class ScyllaDB(NoSQLStorage): + """ScyllaDB implementation for DynamoDB-compatible NoSQL storage. + + This class manages a ScyllaDB instance running in a Docker container, + providing DynamoDB-compatible NoSQL storage through ScyllaDB's Alternator + interface. It handles table creation, data operations, and container + lifecycle management. + + Attributes: + _docker_client: Docker client for container management + _storage_container: Docker container running ScyllaDB + _cfg: ScyllaDB configuration settings + _tables: Mapping of benchmark names to table mappings + _serializer: DynamoDB type serializer for data conversion + client: Boto3 DynamoDB client configured for ScyllaDB + """ + @staticmethod def typename() -> str: + """Get the qualified type name of this class. + + Returns: + str: Full type name including deployment name + """ return f"{ScyllaDB.deployment_name()}.ScyllaDB" @staticmethod def deployment_name() -> str: + """Get the deployment platform name. + + Returns: + str: Deployment name ('scylladb') + """ return "scylladb" @property def config(self) -> ScyllaDBConfig: + """Get the ScyllaDB configuration. + + Returns: + ScyllaDBConfig: The configuration object + """ return self._cfg - # the location does not matter + # The region setting is required by DynamoDB API but not used for local ScyllaDB SCYLLADB_REGION = "None" def __init__( self, - docker_client: docker.client, + docker_client: docker.DockerClient, cache_client: Cache, config: ScyllaDBConfig, resources: Optional[Resources] = None, ): - + """Initialize a ScyllaDB storage instance. + + Args: + docker_client: Docker client for managing the ScyllaDB container + cache_client: Cache client for storing storage configuration + config: ScyllaDB configuration settings + resources: Resources configuration (optional) + """ super().__init__(self.SCYLLADB_REGION, cache_client, resources) # type: ignore self._docker_client = docker_client - self._storage_container: Optional[docker.container] = None + self._storage_container: Optional[docker.models.containers.Container] = None self._cfg = config # Map benchmark -> orig_name -> table_name @@ -59,8 +109,20 @@ def __init__( endpoint_url=f"http://{config.address}", ) - def start(self): - + def start(self) -> None: + """Start a ScyllaDB storage container. + + Creates and runs a Docker container with ScyllaDB, configuring it with + the specified CPU and memory resources. The container runs in detached + mode and exposes the Alternator DynamoDB-compatible API on the configured port. + + The method waits for ScyllaDB to fully initialize by checking the nodetool + status until the service is ready. + + Raises: + RuntimeError: If starting the ScyllaDB container fails or if ScyllaDB + fails to initialize within the timeout period + """ if self._cfg.data_volume == "": scylladb_volume = os.path.join(project_absolute_path(), "scylladb-volume") else: @@ -76,7 +138,6 @@ def start(self): } try: - scylladb_args = "" scylladb_args += f"--smp {self._cfg.cpus} " scylladb_args += f"--memory {self._cfg.memory}M " @@ -104,34 +165,43 @@ def start(self): attempts = 0 max_attempts = 30 while attempts < max_attempts: - exit_code, out = self._storage_container.exec_run("nodetool status") if exit_code == 0: - self.logging.info("Started ScyllaDB succesfully!") + self.logging.info("Started ScyllaDB successfully!") break time.sleep(1.0) attempts += 1 if attempts == max_attempts: - self.logging.error("Failed to launch ScyllaBD!") + self.logging.error("Failed to launch ScyllaDB!") self.logging.error(f"Last result of nodetool status: {out}") - raise RuntimeError("Failed to launch ScyllaBD!") + raise RuntimeError("Failed to launch ScyllaDB!") self.configure_connection() except docker.errors.APIError as e: self.logging.error("Starting ScyllaDB storage failed! Reason: {}".format(e)) - raise RuntimeError("Starting ScyllaDB storage unsuccesful") + raise RuntimeError("Starting ScyllaDB storage unsuccessful") except Exception as e: self.logging.error("Starting ScyllaDB storage failed! Unknown error: {}".format(e)) - raise RuntimeError("Starting ScyllaDB storage unsuccesful") - - # FIXME: refactor this - duplicated code from minio - def configure_connection(self): - # who knows why? otherwise attributes are not loaded + raise RuntimeError("Starting ScyllaDB storage unsuccessful") + + def configure_connection(self) -> None: + """Configure the connection to the ScyllaDB container. + + Determines the appropriate address to connect to the ScyllaDB container + based on the host platform. For Linux, it uses the container's IP address, + while for Windows, macOS, or WSL it uses localhost with the mapped port. + + Creates a boto3 DynamoDB client configured to connect to ScyllaDB's + Alternator interface. + + Raises: + RuntimeError: If the ScyllaDB container is not available or if the IP address + cannot be detected + """ if self._cfg.address == "": - if self._storage_container is None: raise RuntimeError( "ScyllaDB container is not available! Make sure that you deployed " @@ -153,34 +223,60 @@ def configure_connection(self): if not self._cfg.address: self.logging.error( f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" + f"{json.dumps(self._storage_container.attrs, indent=2)}" ) raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" + f"Incorrect detection of IP address for container with id {self._cfg.instance_id}" ) self.logging.info("Starting ScyllaDB instance at {}".format(self._cfg.address)) + + # Create the DynamoDB client for ScyllaDB's Alternator interface + self.client = boto3.client( + "dynamodb", + region_name="None", + aws_access_key_id="None", + aws_secret_access_key="None", + endpoint_url=f"http://{self._cfg.address}", + ) - def stop(self): + def stop(self) -> None: + """Stop the ScyllaDB container. + + Gracefully stops the running ScyllaDB container if it exists. + Logs an error if the container is not known. + """ if self._storage_container is not None: self.logging.info(f"Stopping ScyllaDB container at {self._cfg.address}.") self._storage_container.stop() self.logging.info(f"Stopped ScyllaDB container at {self._cfg.address}.") else: - self.logging.error("Stopping ScyllaDB was not succesful, storage container not known!") - - def envs(self) -> dict: + self.logging.error("Stopping ScyllaDB was not successful, storage container not known!") + + def envs(self) -> Dict[str, str]: + """Generate environment variables for ScyllaDB configuration. + + Creates environment variables that can be used by benchmark functions + to connect to the ScyllaDB storage instance. + + Returns: + Dict[str, str]: Environment variables for ScyllaDB connection + """ return {"NOSQL_STORAGE_TYPE": "scylladb", "NOSQL_STORAGE_ENDPOINT": self._cfg.address} - def serialize(self) -> Tuple[StorageType, dict]: + def serialize(self) -> Tuple[StorageType, Dict[str, Any]]: + """Serialize ScyllaDB configuration to a tuple. + + Returns: + Tuple[StorageType, Dict[str, Any]]: Storage type and serialized configuration + """ return StorageType.SCYLLADB, self._cfg.serialize() - """ - This implementation supports overriding this class. - The main ScyllaDB class is used to start/stop deployments. - - When overriding the implementation in Local/OpenWhisk/..., - we call the _deserialize and provide an alternative implementation. - """ + # Deserialization and inheritance support + # + # This implementation supports overriding this class. The main ScyllaDB class + # is used to start/stop deployments. When overriding the implementation in + # Local/OpenWhisk/..., we call the _deserialize method and provide an + # alternative implementation type. T = TypeVar("T", bound="ScyllaDB") @@ -188,6 +284,24 @@ def serialize(self) -> Tuple[StorageType, dict]: def _deserialize( cached_config: ScyllaDBConfig, cache_client: Cache, resources: Resources, obj_type: Type[T] ) -> T: + """Deserialize a ScyllaDB instance from cached configuration with custom type. + + Creates a new instance of the specified class type from cached configuration + data. This allows platform-specific versions to be deserialized correctly + while sharing the core implementation. + + Args: + cached_config: Cached ScyllaDB configuration + cache_client: Cache client + resources: Resources configuration + obj_type: Type of object to create (a ScyllaDB subclass) + + Returns: + T: Deserialized instance of the specified type + + Raises: + RuntimeError: If the storage container does not exist + """ docker_client = docker.from_env() obj = obj_type(docker_client, cache_client, cached_config, resources) @@ -205,10 +319,32 @@ def _deserialize( def deserialize( cached_config: ScyllaDBConfig, cache_client: Cache, resources: Resources ) -> "ScyllaDB": + """Deserialize a ScyllaDB instance from cached configuration. + + Creates a new ScyllaDB instance from cached configuration data. + + Args: + cached_config: Cached ScyllaDB configuration + cache_client: Cache client + resources: Resources configuration + + Returns: + ScyllaDB: Deserialized ScyllaDB instance + """ return ScyllaDB._deserialize(cached_config, cache_client, resources, ScyllaDB) def retrieve_cache(self, benchmark: str) -> bool: - + """Retrieve cached table configuration for a benchmark. + + Checks if table configuration for the given benchmark is already loaded + in memory, and if not, attempts to load it from the cache. + + Args: + benchmark: Name of the benchmark + + Returns: + bool: True if table configuration was found, False otherwise + """ if benchmark in self._tables: return True @@ -219,8 +355,15 @@ def retrieve_cache(self, benchmark: str) -> bool: return False - def update_cache(self, benchmark: str): - + def update_cache(self, benchmark: str) -> None: + """Update the cache with table configuration for a benchmark. + + Stores the table configuration for the specified benchmark in the cache + for future retrieval. + + Args: + benchmark: Name of the benchmark + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, @@ -230,10 +373,26 @@ def update_cache(self, benchmark: str): ) def get_tables(self, benchmark: str) -> Dict[str, str]: + """Get the table name mappings for a benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Dict[str, str]: Mapping from original table names to actual table names + """ return self._tables[benchmark] def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: - + """Get the actual table name for a benchmark's logical table name. + + Args: + benchmark: Name of the benchmark + table: Logical table name + + Returns: + Optional[str]: Actual table name or None if not found + """ if benchmark not in self._tables: return None @@ -246,11 +405,25 @@ def write_to_table( self, benchmark: str, table: str, - data: dict, + data: Dict[str, Any], primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, - ): - + ) -> None: + """Write data to a DynamoDB table in ScyllaDB. + + Serializes the data using DynamoDB type serialization and writes it + to the specified table with the provided primary and optional secondary keys. + + Args: + benchmark: Name of the benchmark + table: Logical table name + data: Data to write to the table + primary_key: Tuple of (key_name, key_value) for the primary key + secondary_key: Optional tuple of (key_name, key_value) for the secondary key + + Raises: + AssertionError: If the table name is not found + """ table_name = self._get_table_name(benchmark, table) assert table_name is not None @@ -261,21 +434,34 @@ def write_to_table( serialized_data = {k: self._serializer.serialize(v) for k, v in data.items()} self.client.put_item(TableName=table_name, Item=serialized_data) - """ - AWS: create a DynamoDB Table - - In contrast to the hierarchy of database objects in Azure (account -> database -> container) - and GCP (database per benchmark), we need to create unique table names here. - """ def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: - + """Create a DynamoDB table in ScyllaDB. + + Creates a new DynamoDB table with the specified primary key and optional + secondary key. The table name is constructed to be unique across benchmarks + and resource groups. + + Note: Unlike cloud providers with hierarchical database structures, + ScyllaDB requires unique table names at the cluster level. + + Args: + benchmark: Name of the benchmark + name: Logical table name + primary_key: Name of the primary key attribute + secondary_key: Optional name of the secondary key attribute + + Returns: + str: The actual table name that was created + + Raises: + RuntimeError: If table creation fails for unknown reasons + """ table_name = f"sebs-benchmarks-{self._cloud_resources.resources_id}-{benchmark}-{name}" try: - definitions = [{"AttributeName": primary_key, "AttributeType": "S"}] key_schema = [{"AttributeName": primary_key, "KeyType": "HASH"}] @@ -301,7 +487,6 @@ def create_table( return ret["TableDescription"]["TableName"] except self.client.exceptions.ResourceInUseException as e: - if "already exists" in e.response["Error"]["Message"]: self.logging.info( f"Using existing DynamoDB table {table_name} for benchmark {benchmark}" @@ -312,7 +497,29 @@ def create_table( raise RuntimeError(f"Creating DynamoDB failed, unknown reason! Error: {e}") def clear_table(self, name: str) -> str: + """Clear all data from a table. + + Args: + name: Name of the table to clear + + Returns: + str: Table name + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """Remove a table completely. + + Args: + name: Name of the table to remove + + Returns: + str: Table name + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() From cc6d763466c066b47160efe754a60828924fb681 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 23 Jun 2025 12:51:08 +0200 Subject: [PATCH 07/21] [system] Linting of docstrings --- sebs/aws/__init__.py | 3 +- sebs/aws/aws.py | 150 ++++++++++--------- sebs/aws/config.py | 172 +++++++++++----------- sebs/aws/container.py | 30 ++-- sebs/aws/dynamodb.py | 50 +++---- sebs/aws/function.py | 22 +-- sebs/aws/resources.py | 28 ++-- sebs/aws/s3.py | 54 +++---- sebs/aws/triggers.py | 68 ++++----- sebs/azure/azure.py | 106 ++++++------- sebs/azure/blob_storage.py | 60 ++++---- sebs/azure/cli.py | 38 ++--- sebs/azure/cloud_resources.py | 8 +- sebs/azure/config.py | 188 ++++++++++++------------ sebs/azure/cosmosdb.py | 10 +- sebs/azure/function.py | 18 +-- sebs/azure/triggers.py | 46 +++--- sebs/benchmark.py | 98 ++++++------ sebs/cache.py | 107 +++++++------- sebs/config.py | 68 ++++----- sebs/experiments/config.py | 40 ++--- sebs/experiments/environment.py | 39 ++--- sebs/experiments/eviction_model.py | 60 ++++---- sebs/experiments/experiment.py | 18 +-- sebs/experiments/invocation_overhead.py | 55 +++---- sebs/experiments/network_ping_pong.py | 30 ++-- sebs/experiments/perf_cost.py | 52 +++---- sebs/experiments/result.py | 40 ++--- sebs/experiments/startup_time.py | 27 ++-- sebs/faas/function.py | 185 +++++++++++------------ sebs/faas/nosql.py | 48 +++--- sebs/faas/storage.py | 10 +- sebs/faas/system.py | 103 ++++++------- sebs/gcp/cli.py | 27 ++-- sebs/gcp/config.py | 83 +++++------ sebs/gcp/datastore.py | 65 ++++---- sebs/gcp/function.py | 25 ++-- sebs/gcp/gcp.py | 144 +++++++++--------- sebs/gcp/resources.py | 31 ++-- sebs/gcp/storage.py | 49 +++--- sebs/gcp/triggers.py | 58 ++++---- sebs/local/config.py | 58 ++++---- sebs/local/deployment.py | 28 ++-- sebs/local/function.py | 56 +++---- sebs/local/local.py | 96 ++++++------ sebs/local/measureMem.py | 27 +--- sebs/openwhisk/config.py | 62 ++++---- sebs/openwhisk/container.py | 22 +-- sebs/openwhisk/function.py | 38 ++--- sebs/openwhisk/openwhisk.py | 89 +++++------ sebs/openwhisk/triggers.py | 68 ++++----- sebs/statistics.py | 16 +- sebs/storage/__init__.py | 2 +- sebs/storage/config.py | 78 +++++----- sebs/storage/minio.py | 187 +++++++++++------------ sebs/storage/resources.py | 55 +++---- sebs/storage/scylladb.py | 97 ++++++------ sebs/types.py | 20 ++- sebs/utils.py | 98 ++++++------ 59 files changed, 1832 insertions(+), 1778 deletions(-) diff --git a/sebs/aws/__init__.py b/sebs/aws/__init__.py index 236041cf..3f1bfe4b 100644 --- a/sebs/aws/__init__.py +++ b/sebs/aws/__init__.py @@ -9,7 +9,8 @@ AWS: Main AWS system implementation LambdaFunction: AWS Lambda function representation AWSConfig: AWS-specific configuration management - S3: S3 storage implementation + S3: Object storage implementation for S3 + DynamoDB: Key-value store implementation for DynamoDB The module handles AWS-specific functionality including: - Lambda function deployment and management diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 4e2aaf36..24f68d2e 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -36,15 +36,16 @@ class AWS(System): """ AWS Lambda implementation of the System interface. - + This class implements the FaaS System interface for AWS Lambda, providing methods for deploying, invoking, and managing Lambda functions. - + Attributes: logs_client: AWS CloudWatch Logs client cached: Whether AWS resources have been cached _config: AWS-specific configuration """ + logs_client = None cached = False _config: AWSConfig @@ -53,7 +54,7 @@ class AWS(System): def name() -> str: """ Get the name of this system. - + Returns: str: System name ('aws') """ @@ -63,7 +64,7 @@ def name() -> str: def typename() -> str: """ Get the type name of this system. - + Returns: str: Type name ('AWS') """ @@ -73,7 +74,7 @@ def typename() -> str: def function_type() -> "Type[Function]": """ Get the function type for this system. - + Returns: Type[Function]: LambdaFunction class """ @@ -83,7 +84,7 @@ def function_type() -> "Type[Function]": def config(self) -> AWSConfig: """ Get the AWS-specific configuration. - + Returns: AWSConfig: AWS configuration """ @@ -93,7 +94,7 @@ def config(self) -> AWSConfig: def system_resources(self) -> AWSSystemResources: """ Get the AWS system resources manager. - + Returns: AWSSystemResources: AWS resource manager """ @@ -109,7 +110,7 @@ def __init__( ): """ Initialize the AWS system. - + Args: sebs_config: SeBs system configuration config: AWS-specific configuration @@ -131,10 +132,10 @@ def __init__( def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): """ Initialize AWS resources. - + Creates a boto3 session, initializes Lambda client, and prepares system resources and ECR client. - + Args: config: Additional configuration parameters resource_prefix: Optional prefix for resource names @@ -155,7 +156,7 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] def get_lambda_client(self): """ Get or create an AWS Lambda client. - + Returns: boto3.client: Lambda client """ @@ -178,27 +179,34 @@ def package_code( ) -> Tuple[str, int, str]: """ Package code for deployment to AWS Lambda. - + Creates a suitable deployment package with the following structure: - + function/ - function.py - storage.py - resources/ handler.py - + + It would be sufficient to just pack the code and ship it as zip to AWS. + However, to have a compatible function implementation across providers, + we create a small module. + Issue: relative imports in Python when using storage wrapper. + Azure expects a relative import inside a module thus it's easier + to always create a module. + For container deployments, builds a Docker image and pushes it to ECR. For ZIP deployments, creates a ZIP package compatible with Lambda. - + Args: directory: Path to the code directory - language_name: Programming language name (e.g., 'python', 'nodejs') + language_name: Programming language name (e.g., 'python', 'nodejs') language_version: Language version (e.g., '3.8', '14') architecture: Target CPU architecture (e.g., 'x64', 'arm64') benchmark: Benchmark name is_cached: Whether code is already cached container_deployment: Whether to use container deployment - + Returns: Tuple containing: - Path to the packaged code (ZIP file) @@ -212,7 +220,12 @@ def package_code( if container_deployment: # build base image and upload to ECR _, container_uri = self.ecr_client.build_base_image( - directory, language_name, language_version, architecture, benchmark, is_cached + directory, + language_name, + language_version, + architecture, + benchmark, + is_cached, ) CONFIG_FILES = { @@ -246,10 +259,10 @@ def package_code( def _map_architecture(self, architecture: str) -> str: """ Map architecture name to AWS Lambda-compatible format. - + Args: architecture: Architecture name from SeBs (e.g., 'x64') - + Returns: str: AWS Lambda-compatible architecture name (e.g., 'x86_64') """ @@ -260,14 +273,14 @@ def _map_architecture(self, architecture: str) -> str: def _map_language_runtime(self, language: str, runtime: str) -> str: """ Map language runtime to AWS Lambda-compatible format. - + AWS uses different naming schemes for runtime versions. For example, Node.js uses '12.x' instead of '12'. - + Args: language: Language name (e.g., 'nodejs', 'python') runtime: Runtime version (e.g., '12', '3.8') - + Returns: str: AWS Lambda-compatible runtime version """ @@ -286,16 +299,16 @@ def create_function( ) -> "LambdaFunction": """ Create or update an AWS Lambda function. - + If the function already exists, it updates the code and configuration. Otherwise, it creates a new function with the specified parameters. - + Args: code_package: Benchmark code package func_name: Name of the function container_deployment: Whether to use container deployment container_uri: URI of the container image (if container_deployment=True) - + Returns: LambdaFunction: The created or updated Lambda function """ @@ -404,10 +417,10 @@ def create_function( def cached_function(self, function: Function) -> None: """Set up triggers for a cached function. - + Configures triggers for a function that was loaded from cache, ensuring they have proper logging handlers and deployment client references. - + Args: function: Function instance to configure triggers for """ @@ -429,11 +442,11 @@ def update_function( ): """ Update an existing AWS Lambda function. - + Updates the function code and waits for the update to complete. For container deployments, updates the container image. For ZIP deployments, uploads the code package directly or via S3. - + Args: function: The function to update code_package: Benchmark code package @@ -487,16 +500,16 @@ def update_function_configuration( self, function: Function, code_package: Benchmark, env_variables: dict = {} ) -> None: """Update Lambda function configuration. - + Updates the function's timeout, memory, and environment variables. Automatically adds environment variables for NoSQL storage table names if the benchmark uses NoSQL storage. - + Args: function: Function to update code_package: Benchmark code package with configuration env_variables: Additional environment variables to set - + Raises: AssertionError: If code package input has not been processed """ @@ -546,15 +559,15 @@ def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: """Generate default function name for a benchmark. - + Creates a standardized function name based on resource ID, benchmark name, language, version, and architecture. Ensures the name is compatible with AWS Lambda naming requirements. - + Args: code_package: Benchmark code package resources: Optional resources object (uses default if not provided) - + Returns: str: Formatted function name suitable for AWS Lambda """ @@ -574,13 +587,13 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: """Format function name for AWS Lambda compatibility. - + AWS Lambda has specific naming requirements. This method ensures the function name complies with AWS Lambda naming rules. - + Args: func_name: Raw function name - + Returns: str: Formatted function name with illegal characters replaced """ @@ -591,12 +604,12 @@ def format_function_name(func_name: str) -> str: def delete_function(self, func_name: Optional[str]) -> None: """Delete an AWS Lambda function. - + Args: func_name: Name of the function to delete - + Note: - FIXME: does not clean the cache + FIXME: does not clean the cache in SeBS. """ self.logging.debug("Deleting function {}".format(func_name)) try: @@ -609,19 +622,19 @@ def parse_aws_report( log: str, requests: Union[ExecutionResult, Dict[str, ExecutionResult]] ) -> str: """Parse AWS Lambda execution report from CloudWatch logs. - + Extracts execution metrics from AWS Lambda log entries and updates - the corresponding ExecutionResult objects with timing, memory, and - billing information. - + the corresponding ExecutionResult objects with timing, memory, + billing information, and init duration (when provided). + Args: log: Raw log string from CloudWatch or synchronous invocation - requests: Either a single ExecutionResult or dictionary mapping + requests: Either a single ExecutionResult or dictionary mapping request IDs to ExecutionResult objects - + Returns: str: Request ID of the parsed execution - + Example: The log format expected is tab-separated AWS Lambda report format: "REPORT RequestId: abc123\tDuration: 100.00 ms\tBilled Duration: 100 ms\t..." @@ -653,21 +666,24 @@ def parse_aws_report( def shutdown(self) -> None: """Shutdown the AWS system and clean up resources. - + Calls the parent shutdown method to perform standard cleanup. """ super().shutdown() def get_invocation_error(self, function_name: str, start_time: int, end_time: int) -> None: """Retrieve and log invocation errors from CloudWatch Logs. - + Queries CloudWatch Logs for error messages during the specified time range and logs them for debugging purposes. - + Args: function_name: Name of the Lambda function start_time: Start time for log query (Unix timestamp) end_time: End time for log query (Unix timestamp) + + Note: + It is unclear at the moment if this function is always working correctly. """ if not self.logs_client: self.logs_client = boto3.client( @@ -713,10 +729,10 @@ def download_metrics( metrics: dict, ) -> None: """Download execution metrics from CloudWatch Logs. - + Queries CloudWatch Logs for Lambda execution reports and parses them to extract performance metrics for each request. - + Args: function_name: Name of the Lambda function start_time: Start time for metrics collection (Unix timestamp) @@ -768,17 +784,17 @@ def download_metrics( def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: """Create a trigger for the specified function. - + Creates and configures a trigger based on the specified type. Currently supports HTTP triggers (via API Gateway) and library triggers. - + Args: func: Function to create trigger for trigger_type: Type of trigger to create (HTTP or LIBRARY) - + Returns: Trigger: The created trigger instance - + Raises: RuntimeError: If trigger type is not supported """ @@ -818,10 +834,10 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> None: """Enforce cold start for a single function. - + Updates the function's environment variables to force a cold start on the next invocation. - + Args: function: Function to enforce cold start for code_package: Benchmark code package with configuration @@ -833,10 +849,10 @@ def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> No def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: """Enforce cold start for multiple functions. - + Updates all specified functions to force cold starts on their next invocations. This is useful for ensuring consistent performance measurements. - + Args: functions: List of functions to enforce cold start for code_package: Benchmark code package with configuration @@ -852,10 +868,10 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) def wait_function_active(self, func: LambdaFunction) -> None: """Wait for Lambda function to become active after creation. - + Uses AWS Lambda waiter to wait until the function is in Active state and ready to be invoked. - + Args: func: Lambda function to wait for """ @@ -867,10 +883,10 @@ def wait_function_active(self, func: LambdaFunction) -> None: def wait_function_updated(self, func: LambdaFunction) -> None: """Wait for Lambda function to complete update process. - + Uses AWS Lambda waiter to wait until the function update is complete and the function is ready to be invoked with new configuration. - + Args: func: Lambda function to wait for """ @@ -882,7 +898,7 @@ def wait_function_updated(self, func: LambdaFunction) -> None: def disable_rich_output(self) -> None: """Disable rich output formatting for ECR operations. - + Disables colored/formatted output in the ECR container client, useful for CI/CD environments or when plain text output is preferred. """ diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 527c8bc9..ee28bdab 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -1,13 +1,13 @@ """Configuration management for AWS SeBS integration. This module provides configuration classes for AWS credentials, resources, and settings -used by the Serverless Benchmarking Suite when deploying to AWS Lambda. It handles +used when deploying to AWS Lambda. It handles AWS authentication, resource management including ECR repositories, IAM roles, and HTTP APIs, along with caching and serialization capabilities. Key classes: AWSCredentials: Manages AWS access credentials and account information - AWSResources: Manages AWS resources like ECR repositories, IAM roles, and HTTP APIs + AWSResources: Manages AWS resources like ECR repositories, IAM roles, and HTTP APIs AWSConfig: Main configuration container combining credentials and resources """ @@ -28,23 +28,26 @@ class AWSCredentials(Credentials): """AWS authentication credentials for SeBS. - + This class manages AWS access credentials including access key, secret key, and automatically retrieves the associated AWS account ID through STS. - + + Account ID is cached to retain information on which account was the benchmark + executed. Credentials are not cached. + Attributes: _access_key: AWS access key ID _secret_key: AWS secret access key _account_id: AWS account ID retrieved via STS """ - + def __init__(self, access_key: str, secret_key: str) -> None: """Initialize AWS credentials. - + Args: access_key: AWS access key ID secret_key: AWS secret access key - + Raises: ClientError: If AWS credentials are invalid or STS call fails """ @@ -54,14 +57,16 @@ def __init__(self, access_key: str, secret_key: str) -> None: self._secret_key = secret_key client = boto3.client( - "sts", aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key + "sts", + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, ) self._account_id = client.get_caller_identity()["Account"] @staticmethod def typename() -> str: """Get the type name for these credentials. - + Returns: str: The type name 'AWS.Credentials' """ @@ -70,7 +75,7 @@ def typename() -> str: @property def access_key(self) -> str: """Get the AWS access key ID. - + Returns: str: AWS access key ID """ @@ -79,7 +84,7 @@ def access_key(self) -> str: @property def secret_key(self) -> str: """Get the AWS secret access key. - + Returns: str: AWS secret access key """ @@ -88,7 +93,7 @@ def secret_key(self) -> str: @property def account_id(self) -> str: """Get the AWS account ID. - + Returns: str: AWS account ID """ @@ -97,13 +102,13 @@ def account_id(self) -> str: @staticmethod def initialize(dct: dict) -> "AWSCredentials": """Initialize AWS credentials from a dictionary. - + Args: dct: Dictionary containing 'access_key' and 'secret_key' - + Returns: AWSCredentials: Initialized credentials object - + Raises: KeyError: If required keys are missing from dictionary """ @@ -112,23 +117,21 @@ def initialize(dct: dict) -> "AWSCredentials": @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: """Deserialize AWS credentials from configuration and cache. - + Loads AWS credentials from configuration file, environment variables, or cache. Validates that credentials match cached account ID if available. - + Args: config: Configuration dictionary that may contain credentials cache: Cache instance for retrieving/storing credentials handlers: Logging handlers for error reporting - + Returns: Credentials: Deserialized AWSCredentials instance - + Raises: RuntimeError: If credentials are missing or don't match cached account """ - # FIXME: update return types of both functions to avoid cast - # needs 3.7+ to support annotations cached_config = cache.get_config("aws") ret: AWSCredentials account_id: Optional[str] = None @@ -166,7 +169,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden def update_cache(self, cache: Cache) -> None: """Update the cache with current credentials. - + Args: cache: Cache instance to update """ @@ -174,7 +177,7 @@ def update_cache(self, cache: Cache) -> None: def serialize(self) -> dict: """Serialize credentials to a dictionary. - + Returns: dict: Dictionary containing account_id """ @@ -184,11 +187,11 @@ def serialize(self) -> dict: class AWSResources(Resources): """AWS resource management for SeBS. - + This class manages AWS-specific resources including ECR repositories, IAM roles, HTTP APIs, and Docker registry configurations. It provides methods for creating and managing these resources with caching support. - + Attributes: _docker_registry: Docker registry URL (ECR repository URI) _docker_username: Docker registry username @@ -197,20 +200,20 @@ class AWSResources(Resources): _lambda_role: IAM role ARN for Lambda execution _http_apis: Dictionary of HTTP API configurations """ - + class HTTPApi: """HTTP API configuration for AWS API Gateway. - + Represents an HTTP API resource in AWS API Gateway with its ARN and endpoint. - + Attributes: _arn: API Gateway ARN _endpoint: API Gateway endpoint URL """ - + def __init__(self, arn: str, endpoint: str) -> None: """Initialize HTTP API configuration. - + Args: arn: API Gateway ARN endpoint: API Gateway endpoint URL @@ -221,7 +224,7 @@ def __init__(self, arn: str, endpoint: str) -> None: @property def arn(self) -> str: """Get the API Gateway ARN. - + Returns: str: API Gateway ARN """ @@ -230,7 +233,7 @@ def arn(self) -> str: @property def endpoint(self) -> str: """Get the API Gateway endpoint URL. - + Returns: str: API Gateway endpoint URL """ @@ -239,10 +242,10 @@ def endpoint(self) -> str: @staticmethod def deserialize(dct: dict) -> "AWSResources.HTTPApi": """Deserialize HTTP API from dictionary. - + Args: dct: Dictionary containing 'arn' and 'endpoint' - + Returns: AWSResources.HTTPApi: Deserialized HTTP API instance """ @@ -250,7 +253,7 @@ def deserialize(dct: dict) -> "AWSResources.HTTPApi": def serialize(self) -> dict: """Serialize HTTP API to dictionary. - + Returns: dict: Dictionary containing arn and endpoint """ @@ -264,7 +267,7 @@ def __init__( password: Optional[str] = None, ) -> None: """Initialize AWS resources. - + Args: registry: Docker registry URL (ECR repository URI) username: Docker registry username @@ -281,7 +284,7 @@ def __init__( @staticmethod def typename() -> str: """Get the type name for these resources. - + Returns: str: The type name 'AWS.Resources' """ @@ -290,7 +293,7 @@ def typename() -> str: @property def docker_registry(self) -> Optional[str]: """Get the Docker registry URL. - + Returns: Optional[str]: Docker registry URL (ECR repository URI) """ @@ -299,7 +302,7 @@ def docker_registry(self) -> Optional[str]: @property def docker_username(self) -> Optional[str]: """Get the Docker registry username. - + Returns: Optional[str]: Docker registry username """ @@ -308,7 +311,7 @@ def docker_username(self) -> Optional[str]: @property def docker_password(self) -> Optional[str]: """Get the Docker registry password. - + Returns: Optional[str]: Docker registry password """ @@ -317,7 +320,7 @@ def docker_password(self) -> Optional[str]: @property def container_repository(self) -> Optional[str]: """Get the ECR repository name. - + Returns: Optional[str]: ECR repository name """ @@ -325,17 +328,17 @@ def container_repository(self) -> Optional[str]: def lambda_role(self, boto3_session: boto3.session.Session) -> str: """Get or create IAM role for Lambda execution. - + Creates a Lambda execution role with S3 and basic execution permissions if it doesn't already exist. The role allows Lambda functions to access S3 and write CloudWatch logs. - + Args: boto3_session: Boto3 session for AWS API calls - + Returns: str: Lambda execution role ARN - + Raises: ClientError: If IAM operations fail """ @@ -380,18 +383,18 @@ def http_api( self, api_name: str, func: LambdaFunction, boto3_session: boto3.session.Session ) -> "AWSResources.HTTPApi": """Get or create HTTP API for Lambda function. - + Creates an HTTP API Gateway that routes requests to the specified Lambda function. If the API already exists, returns the cached instance. - + Args: api_name: Name of the HTTP API func: Lambda function to route requests to boto3_session: Boto3 session for AWS API calls - + Returns: AWSResources.HTTPApi: HTTP API configuration - + Raises: RuntimeError: If API creation fails after retries TooManyRequestsException: If API Gateway rate limits are exceeded @@ -452,14 +455,14 @@ def check_ecr_repository_exists( self, ecr_client: ECRClient, repository_name: str ) -> Optional[str]: """Check if ECR repository exists. - + Args: ecr_client: ECR client instance repository_name: Name of the ECR repository - + Returns: Optional[str]: Repository URI if exists, None otherwise - + Raises: Exception: If ECR operation fails (other than RepositoryNotFound) """ @@ -474,16 +477,16 @@ def check_ecr_repository_exists( def get_ecr_repository(self, ecr_client: ECRClient) -> str: """Get or create ECR repository for container deployments. - + Creates an ECR repository with a unique name based on the resource ID if it doesn't already exist. Updates the docker_registry property. - + Args: ecr_client: ECR client instance - + Returns: str: ECR repository name - + Raises: ClientError: If ECR operations fail """ @@ -514,16 +517,16 @@ def get_ecr_repository(self, ecr_client: ECRClient) -> str: def ecr_repository_authorization(self, ecr_client: ECRClient) -> Tuple[str, str, str]: """Get ECR repository authorization credentials. - + Retrieves temporary authorization token from ECR and extracts username and password for Docker registry authentication. - + Args: ecr_client: ECR client instance - + Returns: Tuple[str, str, str]: Username, password, and registry URL - + Raises: AssertionError: If username or registry are None ClientError: If ECR authorization fails @@ -542,13 +545,13 @@ def ecr_repository_authorization(self, ecr_client: ECRClient) -> Tuple[str, str, return self._docker_username, self._docker_password, self._docker_registry @staticmethod - def initialize(res: Resources, dct: dict) -> "AWSResources": + def initialize(res: Resources, dct: dict) -> None: """Initialize AWS resources from dictionary. - + Args: res: Base Resources instance to initialize dct: Dictionary containing resource configuration - + Returns: AWSResources: Initialized AWS resources instance """ @@ -566,11 +569,9 @@ def initialize(res: Resources, dct: dict) -> "AWSResources": for key, value in dct["http-apis"].items(): ret._http_apis[key] = AWSResources.HTTPApi.deserialize(value) - return ret - def serialize(self) -> dict: """Serialize AWS resources to dictionary. - + Returns: dict: Serialized resource configuration """ @@ -588,7 +589,7 @@ def serialize(self) -> dict: def update_cache(self, cache: Cache) -> None: """Update cache with current resource configuration. - + Args: cache: Cache instance to update """ @@ -600,7 +601,8 @@ def update_cache(self, cache: Cache) -> None: val=self.docker_username, keys=["aws", "resources", "docker", "username"] ) cache.update_config( - val=self.container_repository, keys=["aws", "resources", "container_repository"] + val=self.container_repository, + keys=["aws", "resources", "container_repository"], ) cache.update_config(val=self._lambda_role, keys=["aws", "resources", "lambda-role"]) for name, api in self._http_apis.items(): @@ -609,12 +611,12 @@ def update_cache(self, cache: Cache) -> None: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: """Deserialize AWS resources from configuration and cache. - + Args: config: Configuration dictionary cache: Cache instance for retrieving cached resources handlers: Logging handlers for status messages - + Returns: Resources: Deserialized AWSResources instance """ @@ -642,18 +644,18 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AWSConfig(Config): """Main AWS configuration container. - + Combines AWS credentials and resources into a single configuration object for use by the AWS SeBS implementation. - + Attributes: _credentials: AWS authentication credentials _resources: AWS resource management configuration """ - + def __init__(self, credentials: AWSCredentials, resources: AWSResources) -> None: """Initialize AWS configuration. - + Args: credentials: AWS authentication credentials resources: AWS resource management configuration @@ -665,7 +667,7 @@ def __init__(self, credentials: AWSCredentials, resources: AWSResources) -> None @staticmethod def typename() -> str: """Get the type name for this configuration. - + Returns: str: The type name 'AWS.Config' """ @@ -674,7 +676,7 @@ def typename() -> str: @property def credentials(self) -> AWSCredentials: """Get AWS credentials. - + Returns: AWSCredentials: AWS authentication credentials """ @@ -683,17 +685,16 @@ def credentials(self) -> AWSCredentials: @property def resources(self) -> AWSResources: """Get AWS resources configuration. - + Returns: AWSResources: AWS resource management configuration """ return self._resources - # FIXME: use future annotations (see sebs/faas/system) @staticmethod def initialize(cfg: Config, dct: dict) -> None: """Initialize AWS configuration from dictionary. - + Args: cfg: Base Config instance to initialize dct: Dictionary containing 'region' configuration @@ -704,20 +705,19 @@ def initialize(cfg: Config, dct: dict) -> None: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: """Deserialize AWS configuration from config and cache. - + Creates an AWSConfig instance by deserializing credentials and resources, then loading region configuration from cache or user-provided config. - + Args: config: Configuration dictionary cache: Cache instance for retrieving cached configuration handlers: Logging handlers for status messages - + Returns: Config: Deserialized AWSConfig instance """ cached_config = cache.get_config("aws") - # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) config_obj = AWSConfig(credentials, resources) @@ -735,10 +735,10 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config def update_cache(self, cache: Cache) -> None: """Update the contents of the user cache. - + The changes are directly written to the file system. Updates region, credentials, and resources in the cache. - + Args: cache: Cache instance to update """ @@ -748,7 +748,7 @@ def update_cache(self, cache: Cache) -> None: def serialize(self) -> dict: """Serialize AWS configuration to dictionary. - + Returns: dict: Serialized configuration including name, region, credentials, and resources """ diff --git a/sebs/aws/container.py b/sebs/aws/container.py index 74d536a6..3cfa5d5b 100644 --- a/sebs/aws/container.py +++ b/sebs/aws/container.py @@ -23,20 +23,20 @@ class ECRContainer(DockerContainer): """AWS ECR container management for SeBS. - + This class handles Docker container operations specifically for AWS Lambda deployments using Amazon Elastic Container Registry (ECR). It provides functionality for building, tagging, and pushing container images to ECR. - + Attributes: ecr_client: AWS ECR client for registry operations config: AWS-specific configuration """ - + @staticmethod def name() -> str: """Get the name of this container system. - + Returns: str: System name ('aws') """ @@ -45,7 +45,7 @@ def name() -> str: @staticmethod def typename() -> str: """Get the type name of this container system. - + Returns: str: Type name ('AWS.ECRContainer') """ @@ -59,7 +59,7 @@ def __init__( docker_client: docker.client.DockerClient, ) -> None: """Initialize ECR container manager. - + Args: system_config: SeBS system configuration session: AWS boto3 session @@ -73,7 +73,7 @@ def __init__( @property def client(self) -> ECRClient: """Get the ECR client. - + Returns: ECRClient: AWS ECR client for registry operations """ @@ -83,16 +83,16 @@ def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: """Generate ECR registry details for a benchmark image. - + Creates the registry name, repository name, image tag, and full image URI for a specific benchmark configuration. - + Args: benchmark: Name of the benchmark language_name: Programming language (e.g., 'python', 'nodejs') language_version: Language version (e.g., '3.8', '14') architecture: Target architecture (e.g., 'x64', 'arm64') - + Returns: Tuple[str, str, str, str]: Registry name, repository name, image tag, and image URI """ @@ -110,11 +110,11 @@ def registry_name( def find_image(self, repository_name: str, image_tag: str) -> bool: """Check if an image exists in the ECR repository. - + Args: repository_name: Name of the ECR repository image_tag: Tag of the image to search for - + Returns: bool: True if the image exists, False otherwise """ @@ -131,14 +131,14 @@ def find_image(self, repository_name: str, image_tag: str) -> bool: def push_image(self, repository_uri: str, image_tag: str) -> None: """Push a Docker image to ECR. - + Authenticates with ECR using temporary credentials and pushes the specified image to the repository. - + Args: repository_uri: URI of the ECR repository image_tag: Tag of the image to push - + Raises: RuntimeError: If the push operation fails """ diff --git a/sebs/aws/dynamodb.py b/sebs/aws/dynamodb.py index c0d9aff0..bbea3e73 100644 --- a/sebs/aws/dynamodb.py +++ b/sebs/aws/dynamodb.py @@ -21,21 +21,21 @@ class DynamoDB(NoSQLStorage): """AWS DynamoDB NoSQL storage implementation for SeBS. - + This class provides NoSQL storage functionality using Amazon DynamoDB. It handles table creation, data operations, caching, and provides a unified interface for benchmark data storage. - + Attributes: client: DynamoDB client for AWS API operations _tables: Mapping of benchmark names to table configurations _serializer: DynamoDB type serializer for data conversion """ - + @staticmethod def typename() -> str: """Get the type name for this storage system. - + Returns: str: Type name ('AWS.DynamoDB') """ @@ -44,7 +44,7 @@ def typename() -> str: @staticmethod def deployment_name() -> str: """Get the deployment name for this storage system. - + Returns: str: Deployment name ('aws') """ @@ -60,7 +60,7 @@ def __init__( secret_key: str, ) -> None: """Initialize DynamoDB NoSQL storage. - + Args: session: AWS boto3 session cache_client: Cache client for storing table configurations @@ -85,10 +85,10 @@ def __init__( def retrieve_cache(self, benchmark: str) -> bool: """Retrieve table configuration from cache. - + Args: benchmark: Name of the benchmark - + Returns: bool: True if cache was found and loaded, False otherwise """ @@ -104,7 +104,7 @@ def retrieve_cache(self, benchmark: str) -> bool: def update_cache(self, benchmark: str) -> None: """Update cache with current table configuration. - + Args: benchmark: Name of the benchmark to update cache for """ @@ -118,10 +118,10 @@ def update_cache(self, benchmark: str) -> None: def get_tables(self, benchmark: str) -> Dict[str, str]: """Get table mappings for a benchmark. - + Args: benchmark: Name of the benchmark - + Returns: Dict[str, str]: Mapping of logical table names to actual DynamoDB table names """ @@ -129,11 +129,11 @@ def get_tables(self, benchmark: str) -> Dict[str, str]: def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: """Get the actual DynamoDB table name for a logical table. - + Args: benchmark: Name of the benchmark table: Logical table name used by the benchmark - + Returns: Optional[str]: Actual DynamoDB table name, or None if not found """ @@ -154,14 +154,14 @@ def write_to_table( secondary_key: Optional[Tuple[str, str]] = None, ) -> None: """Write data to a DynamoDB table. - + Args: benchmark: Name of the benchmark table: Logical table name data: Data to write to the table primary_key: Primary key as (attribute_name, value) tuple secondary_key: Optional secondary key as (attribute_name, value) tuple - + Raises: AssertionError: If the table name is not found """ @@ -179,20 +179,20 @@ def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: """Create a DynamoDB table for benchmark data. - + Creates a DynamoDB table with a unique name for the benchmark. Unlike Azure (account -> database -> container) and GCP (database per benchmark), AWS requires unique table names across the account. - + Args: benchmark: Name of the benchmark name: Logical table name primary_key: Name of the primary key attribute secondary_key: Optional name of the secondary key attribute - + Returns: str: Name of the created table - + Raises: RuntimeError: If table creation fails for unknown reasons """ @@ -261,13 +261,13 @@ def create_table( def clear_table(self, name: str) -> str: """Clear all data from a table. - + Args: name: Name of the table to clear - + Returns: str: Result of the operation - + Raises: NotImplementedError: This operation is not yet implemented """ @@ -275,13 +275,13 @@ def clear_table(self, name: str) -> str: def remove_table(self, name: str) -> str: """Remove a table completely. - + Args: name: Name of the table to remove - + Returns: str: Result of the operation - + Raises: NotImplementedError: This operation is not yet implemented """ diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 58ebd69e..10aac845 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -16,18 +16,18 @@ class LambdaFunction(Function): """ AWS Lambda function implementation for the SeBs framework. - + This class represents an AWS Lambda function in the serverless benchmarking suite. It extends the base Function class with AWS-specific attributes and functionality. - + Attributes: arn: Amazon Resource Name of the Lambda function role: IAM role ARN used by the function runtime: Runtime environment for the function (e.g., 'python3.8') bucket: S3 bucket name where the function code is stored """ - + def __init__( self, name: str, @@ -41,7 +41,7 @@ def __init__( ): """ Initialize an AWS Lambda function. - + Args: name: Name of the function benchmark: Name of the benchmark @@ -62,7 +62,7 @@ def __init__( def typename() -> str: """ Get the type name of this class. - + Returns: str: The type name """ @@ -71,7 +71,7 @@ def typename() -> str: def serialize(self) -> dict: """ Serialize the Lambda function to a dictionary. - + Returns: dict: Dictionary representation of the Lambda function """ @@ -87,13 +87,13 @@ def serialize(self) -> dict: def deserialize(cached_config: dict) -> "LambdaFunction": """ Create a LambdaFunction instance from a cached configuration. - + Args: cached_config: Dictionary containing the cached function configuration - + Returns: LambdaFunction: A new instance with the deserialized data - + Raises: AssertionError: If an unknown trigger type is encountered """ @@ -123,11 +123,11 @@ def deserialize(cached_config: dict) -> "LambdaFunction": def code_bucket(self, benchmark: str, storage_client: S3) -> str: """ Get the S3 bucket for the function code. - + Args: benchmark: Name of the benchmark storage_client: S3 storage client - + Returns: str: Name of the S3 bucket """ diff --git a/sebs/aws/resources.py b/sebs/aws/resources.py index f8048d50..d29d8f75 100644 --- a/sebs/aws/resources.py +++ b/sebs/aws/resources.py @@ -25,22 +25,22 @@ class AWSSystemResources(SystemResources): """AWS system resources manager for SeBS. - + This class manages AWS-specific resources including S3 storage and DynamoDB NoSQL storage. It provides a unified interface for accessing AWS services with proper session management and caching. - + Attributes: _session: AWS boto3 session for API calls _logging_handlers: Logging configuration handlers _storage: S3 storage client instance _nosql_storage: DynamoDB NoSQL storage client instance """ - + @staticmethod def typename() -> str: """Get the type name for these resources. - + Returns: str: The type name 'AWS.SystemResources' """ @@ -49,7 +49,7 @@ def typename() -> str: @property def config(self) -> AWSConfig: """Get the AWS configuration. - + Returns: AWSConfig: AWS-specific configuration """ @@ -63,7 +63,7 @@ def __init__( logger_handlers: LoggingHandlers, ) -> None: """Initialize AWS system resources. - + Args: config: AWS-specific configuration cache_client: Cache client for resource caching @@ -79,7 +79,7 @@ def __init__( def initialize_session(self, session: boto3.session.Session) -> None: """Initialize the AWS boto3 session. - + Args: session: Boto3 session to use for AWS API calls """ @@ -87,16 +87,16 @@ def initialize_session(self, session: boto3.session.Session) -> None: def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: """Get or create S3 storage client. - + Creates a client instance for S3 cloud storage. Storage is initialized with required buckets that may be created or retrieved from cache. - + Args: replace_existing: Whether to replace existing files in cached buckets - + Returns: PersistentStorage: S3 storage client instance - + Raises: AssertionError: If session has not been initialized """ @@ -120,13 +120,13 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStor def get_nosql_storage(self) -> NoSQLStorage: """Get or create DynamoDB NoSQL storage client. - + Creates a client instance for DynamoDB NoSQL storage. The client is configured with AWS credentials and region from the system config. - + Returns: NoSQLStorage: DynamoDB NoSQL storage client instance - + Raises: AssertionError: If session has not been initialized """ diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index 82ebe836..bc0d5b61 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -21,20 +21,20 @@ class S3(PersistentStorage): """AWS S3 persistent storage implementation for SeBS. - + This class provides persistent storage functionality using Amazon S3. It handles bucket creation, file operations, and provides a unified interface for benchmark data storage and retrieval. - + Attributes: client: S3 client for AWS API operations cached: Whether bucket configurations are cached """ - + @staticmethod def typename() -> str: """Get the type name for this storage system. - + Returns: str: Type name ('AWS.S3') """ @@ -43,7 +43,7 @@ def typename() -> str: @staticmethod def deployment_name() -> str: """Get the deployment name for this storage system. - + Returns: str: Deployment name ('aws') """ @@ -52,7 +52,7 @@ def deployment_name() -> str: @property def replace_existing(self) -> bool: """Get whether to replace existing files. - + Returns: bool: True if existing files should be replaced, False otherwise """ @@ -61,7 +61,7 @@ def replace_existing(self) -> bool: @replace_existing.setter def replace_existing(self, val: bool) -> None: """Set whether to replace existing files. - + Args: val: True to replace existing files, False otherwise """ @@ -78,7 +78,7 @@ def __init__( replace_existing: bool, ) -> None: """Initialize S3 persistent storage. - + Args: session: AWS boto3 session cache_client: Cache client for storing bucket configurations @@ -99,10 +99,10 @@ def __init__( def correct_name(self, name: str) -> str: """Correct bucket name for S3 naming requirements. - + Args: name: Original bucket name - + Returns: str: Corrected bucket name (no changes for S3) """ @@ -112,18 +112,18 @@ def _create_bucket( self, name: str, buckets: List[str] = [], randomize_name: bool = False ) -> str: """Create an S3 bucket with the specified name. - + Handles the complex S3 bucket creation logic including region-specific requirements and conflict resolution. - + Args: name: Desired bucket name buckets: List of existing buckets to check against randomize_name: Whether to append a random suffix to ensure uniqueness - + Returns: str: Name of the created bucket - + Raises: BucketAlreadyExists: If bucket already exists in the same region ClientError: If bucket creation fails for other reasons @@ -178,10 +178,10 @@ def _create_bucket( def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: """Upload a file to S3 with caching and replacement logic. - + Handles the upload of benchmark files with appropriate caching behavior and replacement logic based on configuration. - + Args: path_idx: Index of the input path configuration key: S3 object key for the file @@ -205,7 +205,7 @@ def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: def upload(self, bucket_name: str, filepath: str, key: str) -> None: """Upload a file to S3. - + Args: bucket_name: Name of the S3 bucket filepath: Local path to the file to upload @@ -216,7 +216,7 @@ def upload(self, bucket_name: str, filepath: str, key: str) -> None: def download(self, bucket_name: str, key: str, filepath: str) -> None: """Download a file from S3. - + Args: bucket_name: Name of the S3 bucket key: S3 object key of the file to download @@ -227,10 +227,10 @@ def download(self, bucket_name: str, key: str, filepath: str) -> None: def exists_bucket(self, bucket_name: str) -> bool: """Check if an S3 bucket exists and is accessible. - + Args: bucket_name: Name of the bucket to check - + Returns: bool: True if bucket exists and is accessible, False otherwise """ @@ -242,11 +242,11 @@ def exists_bucket(self, bucket_name: str) -> bool: def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: """List objects in an S3 bucket with optional prefix filtering. - + Args: bucket_name: Name of the S3 bucket prefix: Optional prefix to filter objects - + Returns: List[str]: List of object keys in the bucket """ @@ -260,10 +260,10 @@ def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: """List S3 buckets with optional name filtering. - + Args: bucket_name: Optional bucket name pattern to filter by - + Returns: List[str]: List of bucket names """ @@ -275,7 +275,7 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: def clean_bucket(self, bucket: str) -> None: """Remove all objects from an S3 bucket. - + Args: bucket: Name of the bucket to clean """ @@ -286,10 +286,10 @@ def clean_bucket(self, bucket: str) -> None: def remove_bucket(self, bucket: str) -> None: """Delete an S3 bucket. - + Args: bucket: Name of the bucket to delete - + Note: The bucket must be empty before it can be deleted """ diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 75ebbcd0..f867d749 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -21,19 +21,19 @@ class LibraryTrigger(Trigger): """AWS Lambda library trigger for direct SDK invocation. - + This trigger uses the AWS Lambda SDK to directly invoke Lambda functions. It provides both synchronous and asynchronous invocation methods with comprehensive result parsing and error handling. - + Attributes: name: Name of the Lambda function _deployment_client: AWS deployment client for Lambda operations """ - + def __init__(self, fname: str, deployment_client: Optional[AWS] = None) -> None: """Initialize the library trigger. - + Args: fname: Name of the Lambda function deployment_client: AWS deployment client (can be set later) @@ -45,7 +45,7 @@ def __init__(self, fname: str, deployment_client: Optional[AWS] = None) -> None: @staticmethod def typename() -> str: """Get the type name for this trigger. - + Returns: str: Type name ('AWS.LibraryTrigger') """ @@ -54,10 +54,10 @@ def typename() -> str: @property def deployment_client(self) -> AWS: """Get the AWS deployment client. - + Returns: AWS: AWS deployment client - + Raises: AssertionError: If deployment client is not set """ @@ -67,7 +67,7 @@ def deployment_client(self) -> AWS: @deployment_client.setter def deployment_client(self, deployment_client: AWS) -> None: """Set the AWS deployment client. - + Args: deployment_client: AWS deployment client to set """ @@ -76,7 +76,7 @@ def deployment_client(self, deployment_client: AWS) -> None: @staticmethod def trigger_type() -> Trigger.TriggerType: """Get the trigger type. - + Returns: Trigger.TriggerType: LIBRARY trigger type """ @@ -84,13 +84,13 @@ def trigger_type() -> Trigger.TriggerType: def sync_invoke(self, payload: dict) -> ExecutionResult: """Synchronously invoke the Lambda function. - + Invokes the Lambda function with the provided payload and waits for the result. Parses AWS-specific metrics and benchmark output. - + Args: payload: Dictionary payload to send to the function - + Returns: ExecutionResult: Result of the function execution including metrics """ @@ -130,16 +130,16 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: def async_invoke(self, payload: dict) -> dict: """Asynchronously invoke the Lambda function. - + Triggers the Lambda function asynchronously without waiting for the result. Used for fire-and-forget invocations. - + Args: payload: Dictionary payload to send to the function - + Returns: dict: AWS Lambda invocation response - + Raises: RuntimeError: If the async invocation fails """ @@ -161,7 +161,7 @@ def async_invoke(self, payload: dict) -> dict: def serialize(self) -> dict: """Serialize the trigger to a dictionary. - + Returns: dict: Serialized trigger configuration """ @@ -170,10 +170,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: """Deserialize a trigger from a dictionary. - + Args: obj: Dictionary containing trigger configuration - + Returns: Trigger: Deserialized LibraryTrigger instance """ @@ -182,19 +182,19 @@ def deserialize(obj: dict) -> Trigger: class HTTPTrigger(Trigger): """AWS API Gateway HTTP trigger for Lambda functions. - + This trigger uses HTTP requests to invoke Lambda functions through AWS API Gateway. It provides both synchronous and asynchronous invocation methods. - + Attributes: url: API Gateway endpoint URL api_id: API Gateway API ID """ - + def __init__(self, url: str, api_id: str) -> None: """Initialize the HTTP trigger. - + Args: url: API Gateway endpoint URL api_id: API Gateway API ID @@ -206,7 +206,7 @@ def __init__(self, url: str, api_id: str) -> None: @staticmethod def typename() -> str: """Get the type name for this trigger. - + Returns: str: Type name ('AWS.HTTPTrigger') """ @@ -215,7 +215,7 @@ def typename() -> str: @staticmethod def trigger_type() -> Trigger.TriggerType: """Get the trigger type. - + Returns: Trigger.TriggerType: HTTP trigger type """ @@ -223,13 +223,13 @@ def trigger_type() -> Trigger.TriggerType: def sync_invoke(self, payload: dict) -> ExecutionResult: """Synchronously invoke the function via HTTP. - + Sends an HTTP request to the API Gateway endpoint and waits for the response. - + Args: payload: Dictionary payload to send to the function - + Returns: ExecutionResult: Result of the HTTP invocation """ @@ -238,12 +238,12 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: def async_invoke(self, payload: dict) -> concurrent.futures.Future: """Asynchronously invoke the function via HTTP. - + Submits the HTTP invocation to a thread pool for asynchronous execution. - + Args: payload: Dictionary payload to send to the function - + Returns: concurrent.futures.Future: Future object for the async invocation """ @@ -253,7 +253,7 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: def serialize(self) -> dict: """Serialize the trigger to a dictionary. - + Returns: dict: Serialized trigger configuration """ @@ -262,10 +262,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: """Deserialize a trigger from a dictionary. - + Args: obj: Dictionary containing trigger configuration - + Returns: Trigger: Deserialized HTTPTrigger instance """ diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 2208c2ce..e88cf9db 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -62,11 +62,11 @@ class Azure(System): """Azure serverless platform implementation. - + This class implements the Azure-specific functionality for the SeBS benchmarking suite. It handles Azure Functions deployment, resource management, and benchmark execution on Microsoft Azure platform. - + Attributes: logs_client: Azure logs client (currently unused) storage: BlobStorage instance for Azure Blob Storage operations @@ -74,7 +74,7 @@ class Azure(System): _config: Azure configuration containing credentials and resources AZURE_RUNTIMES: Mapping of language names to Azure runtime identifiers """ - + logs_client = None storage: BlobStorage cached: bool = False @@ -86,7 +86,7 @@ class Azure(System): @staticmethod def name() -> str: """Get the platform name. - + Returns: Platform name 'azure'. """ @@ -95,7 +95,7 @@ def name() -> str: @property def config(self) -> AzureConfig: """Get Azure configuration. - + Returns: Azure configuration containing credentials and resources. """ @@ -104,7 +104,7 @@ def config(self) -> AzureConfig: @staticmethod def function_type() -> Type[Function]: """Get the function type for Azure. - + Returns: AzureFunction class type. """ @@ -113,7 +113,7 @@ def function_type() -> Type[Function]: @property def cli_instance(self) -> AzureCLI: """Get Azure CLI instance. - + Returns: Azure CLI instance for executing Azure commands. """ @@ -128,7 +128,7 @@ def __init__( logger_handlers: LoggingHandlers, ) -> None: """Initialize Azure system. - + Args: sebs_config: SeBS configuration settings config: Azure-specific configuration @@ -151,10 +151,10 @@ def initialize( resource_prefix: Optional[str] = None, ) -> None: """Initialize Azure system and start CLI container. - + Initializes Azure resources and allocates shared resources like data storage account. Starts the Docker container with Azure CLI tools. - + Args: config: Additional configuration parameters resource_prefix: Optional prefix for resource naming @@ -164,7 +164,7 @@ def initialize( def shutdown(self) -> None: """Shutdown Azure system and cleanup resources. - + Stops the Azure CLI container and performs cleanup of system resources. """ cast(AzureSystemResources, self._system_resources).shutdown() @@ -172,10 +172,10 @@ def shutdown(self) -> None: def find_deployments(self) -> List[str]: """Find existing SeBS deployments by scanning resource groups. - + Looks for Azure resource groups matching the SeBS naming pattern to identify existing deployments that can be reused. - + Returns: List of deployment identifiers found in resource groups. """ @@ -191,7 +191,7 @@ def find_deployments(self) -> List[str]: def allocate_shared_resource(self) -> None: """Allocate shared data storage account. - + Creates or retrieves the shared data storage account used for benchmark input/output data. This allows multiple deployment clients to share the same storage, simplifying regression testing. @@ -209,14 +209,14 @@ def package_code( container_deployment: bool, ) -> Tuple[str, int, str]: """Package function code for Azure Functions deployment. - + Creates the proper directory structure and configuration files required for Azure Functions deployment. The structure includes: - handler/ directory with source files and Azure wrappers - function.json with trigger and binding configuration - host.json with runtime configuration - requirements.txt or package.json with dependencies - + Args: directory: Directory containing the function code language_name: Programming language (python, nodejs) @@ -225,10 +225,10 @@ def package_code( benchmark: Name of the benchmark is_cached: Whether the package is from cache container_deployment: Whether to use container deployment - + Returns: Tuple of (directory_path, code_size_bytes, container_uri) - + Raises: NotImplementedError: If container deployment is requested. """ @@ -295,19 +295,19 @@ def publish_function( repeat_on_failure: bool = False, ) -> str: """Publish function code to Azure Functions. - + Deploys the packaged function code to Azure Functions using the Azure Functions CLI tools. Handles retries and URL extraction. - + Args: function: Function instance to publish code_package: Benchmark code package to deploy container_dest: Destination path in the CLI container repeat_on_failure: Whether to retry on failure - + Returns: URL for invoking the published function. - + Raises: RuntimeError: If function publication fails or URL cannot be found. """ @@ -377,16 +377,16 @@ def update_function( container_uri: str, ) -> None: """Update existing Azure Function with new code. - + Updates an existing Azure Function with new code package, including environment variables and function configuration. - + Args: function: Function instance to update code_package: New benchmark code package container_deployment: Whether using container deployment container_uri: Container URI (unused for Azure) - + Raises: NotImplementedError: If container deployment is requested. """ @@ -420,18 +420,20 @@ def update_function( trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) - def update_envs(self, function: Function, code_package: Benchmark, env_variables: dict = {}) -> None: + def update_envs( + self, function: Function, code_package: Benchmark, env_variables: dict = {} + ) -> None: """Update environment variables for Azure Function. - + Sets up environment variables required for benchmark execution, including storage connection strings and NoSQL database credentials. Preserves existing environment variables while adding new ones. - + Args: function: Function instance to update code_package: Benchmark code package with requirements env_variables: Additional environment variables to set - + Raises: RuntimeError: If environment variable operations fail. """ @@ -520,10 +522,10 @@ def update_envs(self, function: Function, code_package: Benchmark, env_variables def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: """Update Azure Function configuration. - + Currently not implemented for Azure Functions as memory and timeout configuration is handled at the consumption plan level. - + Args: function: Function instance to configure code_package: Benchmark code package with requirements @@ -535,13 +537,13 @@ def update_function_configuration(self, function: Function, code_package: Benchm def _mount_function_code(self, code_package: Benchmark) -> str: """Mount function code package in Azure CLI container. - + Uploads the function code package to a temporary location in the Azure CLI container for deployment operations. - + Args: code_package: Benchmark code package to mount - + Returns: Path to mounted code in the CLI container. """ @@ -553,15 +555,15 @@ def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: """Generate default function name for Azure. - + Creates a globally unique function name based on resource ID, benchmark name, language, and version. Function app names must be globally unique across all of Azure. - + Args: code_package: Benchmark code package resources: Optional resources (unused) - + Returns: Globally unique function name for Azure. """ @@ -585,20 +587,20 @@ def create_function( container_uri: str, ) -> AzureFunction: """Create new Azure Function. - + Creates a new Azure Function App and deploys the provided code package. Handles function app creation, storage account allocation, and initial deployment with proper configuration. - + Args: code_package: Benchmark code package to deploy func_name: Name for the Azure Function App container_deployment: Whether to use container deployment container_uri: Container URI (unused for Azure) - + Returns: AzureFunction instance representing the created function. - + Raises: NotImplementedError: If container deployment is requested. RuntimeError: If function creation fails. @@ -688,10 +690,10 @@ def create_function( def cached_function(self, function: Function) -> None: """Initialize cached function with current configuration. - + Sets up a cached function with current data storage account and logging handlers for all triggers. - + Args: function: Function instance loaded from cache """ @@ -710,11 +712,11 @@ def download_metrics( metrics: Dict[str, dict], ) -> None: """Download execution metrics from Azure Application Insights. - + Retrieves performance metrics for function executions from Azure Application Insights and updates the execution results with provider-specific timing information. - + Args: function_name: Name of the Azure Function start_time: Start timestamp for metrics collection @@ -796,9 +798,9 @@ def download_metrics( def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> None: """Enforce cold start for a single function. - + Updates environment variable to force cold start behavior. - + Args: function: Function instance to update code_package: Benchmark code package @@ -810,10 +812,10 @@ def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> No def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: """Enforce cold start for multiple functions. - + Forces cold start behavior for all provided functions by updating environment variables and waiting for changes to propagate. - + Args: functions: List of functions to enforce cold start for code_package: Benchmark code package @@ -827,14 +829,14 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: """Create trigger for Azure Function. - + Currently not implemented as HTTP triggers are automatically created for each function during deployment. - + Args: function: Function to create trigger for trigger_type: Type of trigger to create - + Raises: NotImplementedError: Trigger creation is not supported. """ diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index 70842ff1..90211181 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -38,20 +38,20 @@ class BlobStorage(PersistentStorage): """Azure Blob Storage implementation for benchmark data management. - + This class provides Azure Blob Storage operations for storing and retrieving benchmark input data, function outputs, and temporary files. It manages containers (equivalent to S3 buckets) and handles file operations with proper error handling and logging. - + Attributes: client: Azure Blob Service client for storage operations """ - + @staticmethod def typename() -> str: """Get the storage type name. - + Returns: Storage type identifier for Azure Blob Storage. """ @@ -60,7 +60,7 @@ def typename() -> str: @staticmethod def deployment_name() -> str: """Get the deployment platform name. - + Returns: Platform name 'azure'. """ @@ -75,7 +75,7 @@ def __init__( replace_existing: bool, ) -> None: """Initialize Azure Blob Storage. - + Args: region: Azure region for storage operations cache_client: Cache for storing storage configuration @@ -90,15 +90,15 @@ def _create_bucket( self, name: str, containers: List[str] = [], randomize_name: bool = False ) -> str: """Create new Azure Blob Storage container. - + Internal implementation for creating containers with optional name randomization and existence checking. - + Args: name: Base name for the container containers: List of existing containers to check randomize_name: Whether to append random suffix to name - + Returns: Name of the created or existing container. """ @@ -115,13 +115,13 @@ def _create_bucket( def correct_name(self, name: str) -> str: """Correct container name for Azure requirements. - + Azure Blob Storage does not allow dots in container names, so they are replaced with hyphens. - + Args: name: Original container name - + Returns: Corrected container name with dots replaced by hyphens. """ @@ -129,12 +129,12 @@ def correct_name(self, name: str) -> str: def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: """List Azure Blob Storage containers. - + Lists all containers or those matching a prefix. - + Args: bucket_name: Optional prefix to filter container names - + Returns: List of container names. """ @@ -148,10 +148,10 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: def uploader_func(self, container_idx: int, file: str, filepath: str) -> None: """Upload file to Azure Blob Storage container. - + Uploads a file to the specified container with proper path handling and duplicate checking. - + Args: container_idx: Index of the container for file organization file: Name of the file being uploaded @@ -177,9 +177,9 @@ def uploader_func(self, container_idx: int, file: str, filepath: str) -> None: def download(self, container_name: str, key: str, filepath: str) -> None: """Download file from Azure Blob Storage. - + Downloads a blob from the specified container to a local file. - + Args: container_name: Name of the Azure Blob Storage container key: Blob key/name in the container @@ -192,9 +192,9 @@ def download(self, container_name: str, key: str, filepath: str) -> None: def upload(self, container_name: str, filepath: str, key: str) -> None: """Upload file to Azure Blob Storage. - + Uploads a local file to the specified container with the given key. - + Args: container_name: Name of the Azure Blob Storage container filepath: Local file path to upload @@ -207,10 +207,10 @@ def upload(self, container_name: str, filepath: str, key: str) -> None: def exists_bucket(self, container: str) -> bool: """Check if Azure Blob Storage container exists. - + Args: container: Name of the container to check - + Returns: True if container exists, False otherwise. """ @@ -218,14 +218,14 @@ def exists_bucket(self, container: str) -> bool: def list_bucket(self, container: str, prefix: str = "") -> List[str]: """List files in Azure Blob Storage container. - + Returns list of blob names in the specified container, optionally filtered by prefix. - + Args: container: Name of the container to list prefix: Optional prefix to filter blob names - + Returns: List of blob names. Empty list if container is empty. """ @@ -239,10 +239,10 @@ def list_bucket(self, container: str, prefix: str = "") -> List[str]: def clean_bucket(self, bucket: str) -> None: """Clean all blobs from Azure Blob Storage container. - + Removes all blobs from the specified container but keeps the container itself. - + Args: bucket: Name of the container to clean """ @@ -254,9 +254,9 @@ def clean_bucket(self, bucket: str) -> None: def remove_bucket(self, bucket: str) -> None: """Remove Azure Blob Storage container. - + Deletes the entire container and all its contents. - + Args: bucket: Name of the container to remove """ diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index caaa3102..4995c282 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -42,26 +42,26 @@ class AzureCLI(LoggingBase): """Azure CLI Docker container wrapper. - + This class manages a Docker container running Azure CLI tools and provides methods for executing Azure commands, handling authentication, and managing file transfers for serverless function deployment. - + Attributes: docker_instance: Docker container running Azure CLI _insights_installed: Flag indicating if Application Insights extension is installed """ - + def __init__(self, system_config: SeBSConfig, docker_client: docker.client) -> None: """Initialize Azure CLI container. - + Creates and starts a Docker container with Azure CLI tools installed. Handles image pulling if not available locally. - + Args: system_config: SeBS system configuration docker_client: Docker client for container operations - + Raises: RuntimeError: If Docker image pull fails. """ @@ -106,7 +106,7 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client) -> N @staticmethod def typename() -> str: """Get the CLI type name. - + Returns: Type identifier for Azure CLI. """ @@ -114,16 +114,16 @@ def typename() -> str: def execute(self, cmd: str) -> bytes: """Execute Azure CLI command in Docker container. - + Executes the given command in the Azure CLI container and returns the output. Raises an exception if the command fails. - + Args: cmd: Azure CLI command to execute - + Returns: Command output as bytes. - + Raises: RuntimeError: If command execution fails. """ @@ -138,15 +138,15 @@ def execute(self, cmd: str) -> bytes: def login(self, appId: str, tenant: str, password: str) -> bytes: """Login to Azure using service principal credentials. - + Authenticates with Azure using service principal credentials within the Docker container. - + Args: appId: Azure application (client) ID tenant: Azure tenant (directory) ID password: Azure client secret - + Returns: Login command output as bytes. """ @@ -162,15 +162,15 @@ def login(self, appId: str, tenant: str, password: str) -> bytes: def upload_package(self, directory: str, dest: str) -> None: """Upload function package to Docker container. - + Creates a compressed archive of the function package and uploads it to the specified destination in the Docker container. - + Note: This implementation loads the entire archive into memory, which may not be efficient for very large function packages. For large packages, consider using docker cp directly. - + Args: directory: Local directory containing function package dest: Destination path in the Docker container @@ -186,7 +186,7 @@ def upload_package(self, directory: str, dest: str) -> None: def install_insights(self) -> None: """Install Azure Application Insights CLI extension. - + Installs the Application Insights extension for Azure CLI if not already installed. Required for metrics collection. """ @@ -196,7 +196,7 @@ def install_insights(self) -> None: def shutdown(self) -> None: """Shutdown Azure CLI Docker container. - + Stops and removes the Docker container running Azure CLI tools. """ self.logging.info("Stopping Azure manage Docker instance") diff --git a/sebs/azure/cloud_resources.py b/sebs/azure/cloud_resources.py index e0d2a1dd..51cc80e2 100644 --- a/sebs/azure/cloud_resources.py +++ b/sebs/azure/cloud_resources.py @@ -60,8 +60,8 @@ def query_url(account_name: str, resource_group: str, cli_instance: AzureCLI) -> ret = cli_instance.execute( f" az cosmosdb show --name {account_name} " f" --resource-group {resource_group} " ) - ret = json.loads(ret.decode("utf-8")) - return ret["documentEndpoint"] + ret_dct = json.loads(ret.decode("utf-8")) + return ret_dct["documentEndpoint"] @staticmethod def query_credentials(account_name: str, resource_group: str, cli_instance: AzureCLI) -> str: @@ -70,8 +70,8 @@ def query_credentials(account_name: str, resource_group: str, cli_instance: Azur ret = cli_instance.execute( f" az cosmosdb keys list --name {account_name} " f" --resource-group {resource_group} " ) - ret = json.loads(ret.decode("utf-8")) - credential = ret["primaryMasterKey"] + ret_dct = json.loads(ret.decode("utf-8")) + credential = ret_dct["primaryMasterKey"] return credential diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 32da2aff..0229a5de 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -40,14 +40,14 @@ class AzureCredentials(Credentials): """Azure service principal credentials for authentication. - + This class manages Azure service principal credentials required for authenticating with Azure services. It handles app ID, tenant ID, password, and subscription ID validation and caching. - + Attributes: _appId: Azure application (client) ID - _tenant: Azure tenant (directory) ID + _tenant: Azure tenant (directory) ID _password: Azure client secret _subscription_id: Azure subscription ID (optional) """ @@ -61,7 +61,7 @@ def __init__( self, appId: str, tenant: str, password: str, subscription_id: Optional[str] = None ) -> None: """Initialize Azure credentials. - + Args: appId: Azure application (client) ID tenant: Azure tenant (directory) ID @@ -77,7 +77,7 @@ def __init__( @property def appId(self) -> str: """Get the Azure application (client) ID. - + Returns: Azure application ID string. """ @@ -86,7 +86,7 @@ def appId(self) -> str: @property def tenant(self) -> str: """Get the Azure tenant (directory) ID. - + Returns: Azure tenant ID string. """ @@ -95,7 +95,7 @@ def tenant(self) -> str: @property def password(self) -> str: """Get the Azure client secret. - + Returns: Azure client secret string. """ @@ -104,10 +104,10 @@ def password(self) -> str: @property def subscription_id(self) -> str: """Get the Azure subscription ID. - + Returns: Azure subscription ID string. - + Raises: AssertionError: If subscription ID is not set. """ @@ -117,10 +117,10 @@ def subscription_id(self) -> str: @subscription_id.setter def subscription_id(self, subscription_id: str) -> None: """Set the Azure subscription ID with validation. - + Args: subscription_id: Azure subscription ID to set - + Raises: RuntimeError: If provided subscription ID conflicts with cached value. """ @@ -141,7 +141,7 @@ def subscription_id(self, subscription_id: str) -> None: @property def has_subscription_id(self) -> bool: """Check if subscription ID is set. - + Returns: True if subscription ID is set, False otherwise. """ @@ -150,11 +150,11 @@ def has_subscription_id(self) -> bool: @staticmethod def initialize(dct: dict, subscription_id: Optional[str]) -> "AzureCredentials": """Initialize credentials from dictionary. - + Args: dct: Dictionary containing credential information subscription_id: Optional subscription ID to set - + Returns: New AzureCredentials instance. """ @@ -163,18 +163,18 @@ def initialize(dct: dict, subscription_id: Optional[str]) -> "AzureCredentials": @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: """Deserialize credentials from config and cache. - + Loads Azure credentials from either the configuration dictionary or environment variables, with subscription ID retrieved from cache. - + Args: config: Configuration dictionary cache: Cache instance for storing/retrieving cached values handlers: Logging handlers for error reporting - + Returns: AzureCredentials instance with loaded configuration. - + Raises: RuntimeError: If no valid credentials are found in config or environment. """ @@ -207,7 +207,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden def serialize(self) -> dict: """Serialize credentials to dictionary. - + Returns: Dictionary containing serialized credential data. """ @@ -216,7 +216,7 @@ def serialize(self) -> dict: def update_cache(self, cache_client: Cache) -> None: """Update credentials in cache. - + Args: cache_client: Cache instance to update """ @@ -225,32 +225,32 @@ def update_cache(self, cache_client: Cache) -> None: class AzureResources(Resources): """Azure resource management for SeBS benchmarking. - + This class manages Azure cloud resources including storage accounts, resource groups, and CosmosDB accounts required for serverless function benchmarking. It handles resource allocation, caching, and lifecycle management. - + Attributes: _resource_group: Name of the Azure resource group _storage_accounts: List of storage accounts for function code _data_storage_account: Storage account for benchmark data _cosmosdb_account: CosmosDB account for NoSQL storage """ - + class Storage: """Azure Storage Account wrapper. - + Represents an Azure Storage Account with connection details for use in serverless function deployment and data storage. - + Attributes: account_name: Name of the Azure storage account connection_string: Connection string for accessing the storage account """ - + def __init__(self, account_name: str, connection_string: str) -> None: """Initialize Azure Storage account. - + Args: account_name: Name of the Azure storage account connection_string: Connection string for storage access @@ -262,14 +262,14 @@ def __init__(self, account_name: str, connection_string: str) -> None: @staticmethod def from_cache(account_name: str, connection_string: str) -> "AzureResources.Storage": """Create Storage instance from cached data. - + Args: account_name: Name of the storage account connection_string: Connection string for the account - + Returns: New Storage instance with the provided details. - + Raises: AssertionError: If connection string is empty. """ @@ -279,11 +279,11 @@ def from_cache(account_name: str, connection_string: str) -> "AzureResources.Sto @staticmethod def from_allocation(account_name: str, cli_instance: AzureCLI) -> "AzureResources.Storage": """Create Storage instance from newly allocated account. - + Args: account_name: Name of the storage account cli_instance: Azure CLI instance for querying connection string - + Returns: New Storage instance with queried connection string. """ @@ -296,24 +296,24 @@ def from_allocation(account_name: str, cli_instance: AzureCLI) -> "AzureResource @staticmethod def query_connection_string(account_name: str, cli_instance: AzureCLI) -> str: """Query connection string for storage account from Azure. - + Args: account_name: Name of the storage account cli_instance: Azure CLI instance for executing queries - + Returns: Connection string for the storage account. """ ret = cli_instance.execute( "az storage account show-connection-string --name {}".format(account_name) ) - ret = json.loads(ret.decode("utf-8")) - connection_string = ret["connectionString"] + ret_dct = json.loads(ret.decode("utf-8")) + connection_string = ret_dct["connectionString"] return connection_string def serialize(self) -> dict: """Serialize storage account to dictionary. - + Returns: Dictionary containing storage account information. """ @@ -322,10 +322,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> "AzureResources.Storage": """Deserialize storage account from dictionary. - + Args: obj: Dictionary containing storage account data - + Returns: New Storage instance from dictionary data. """ @@ -339,7 +339,7 @@ def __init__( cosmosdb_account: Optional[CosmosDBAccount] = None, ) -> None: """Initialize Azure resources. - + Args: resource_group: Name of Azure resource group storage_accounts: List of storage accounts for function code @@ -354,7 +354,7 @@ def __init__( def set_region(self, region: str) -> None: """Set the Azure region for resource allocation. - + Args: region: Azure region name (e.g., 'westus2') """ @@ -363,7 +363,7 @@ def set_region(self, region: str) -> None: @property def storage_accounts(self) -> List["AzureResources.Storage"]: """Get list of storage accounts for function code. - + Returns: List of Storage instances for function deployment. """ @@ -371,13 +371,13 @@ def storage_accounts(self) -> List["AzureResources.Storage"]: def resource_group(self, cli_instance: AzureCLI) -> str: """Get or create Azure resource group. - + Locates existing resource group or creates a new one with UUID-based name. The resource group is used to contain all SeBS-related Azure resources. - + Args: cli_instance: Azure CLI instance for resource operations - + Returns: Name of the resource group. """ @@ -403,15 +403,15 @@ def resource_group(self, cli_instance: AzureCLI) -> str: def list_resource_groups(self, cli_instance: AzureCLI) -> List[str]: """List SeBS resource groups in the current region. - + Queries Azure for existing resource groups that match the SeBS naming pattern. - + Args: cli_instance: Azure CLI instance for executing queries - + Returns: List of resource group names matching SeBS pattern. - + Raises: RuntimeError: If Azure CLI response cannot be parsed. """ @@ -431,14 +431,14 @@ def list_resource_groups(self, cli_instance: AzureCLI) -> List[str]: def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = True) -> None: """Delete Azure resource group. - + Removes the specified resource group and all contained resources. - + Args: cli_instance: Azure CLI instance for executing deletion name: Name of resource group to delete wait: Whether to wait for deletion to complete - + Raises: RuntimeError: If resource group deletion fails. """ @@ -453,16 +453,16 @@ def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: """Get or create CosmosDB account for NoSQL storage. - + Finds existing CosmosDB account or creates a new serverless one. Account names must be globally unique across Azure. - + Args: cli_instance: Azure CLI instance for CosmosDB operations - + Returns: CosmosDBAccount instance for NoSQL operations. - + Raises: RuntimeError: If CosmosDB account creation or parsing fails. """ @@ -507,15 +507,15 @@ def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: def list_cosmosdb_accounts(self, cli_instance: AzureCLI) -> Dict[str, str]: """List existing CosmosDB accounts in resource group. - + Queries for CosmosDB accounts matching the SeBS naming pattern. - + Args: cli_instance: Azure CLI instance for executing queries - + Returns: Dictionary mapping account names to document endpoints. - + Raises: RuntimeError: If Azure CLI response cannot be parsed. """ @@ -533,13 +533,13 @@ def list_cosmosdb_accounts(self, cli_instance: AzureCLI) -> Dict[str, str]: def data_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": """Get or create storage account for benchmark data. - + Retrieves existing or creates new storage account dedicated to storing benchmark input/output data. This is separate from function code storage. - + Args: cli_instance: Azure CLI instance for storage operations - + Returns: Storage instance for benchmark data operations. """ @@ -554,15 +554,15 @@ def data_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storag def list_storage_accounts(self, cli_instance: AzureCLI) -> List[str]: """List storage accounts in the resource group. - + Queries for all storage accounts within the managed resource group. - + Args: cli_instance: Azure CLI instance for executing queries - + Returns: List of storage account names. - + Raises: RuntimeError: If Azure CLI response cannot be parsed. """ @@ -581,13 +581,13 @@ def list_storage_accounts(self, cli_instance: AzureCLI) -> List[str]: def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": """Create new storage account for function code. - + Creates a new storage account with a UUID-based name for storing function code packages and adds it to the managed accounts list. - + Args: cli_instance: Azure CLI instance for storage operations - + Returns: New Storage instance for function code storage. """ @@ -605,14 +605,14 @@ def _create_storage_account( self, cli_instance: AzureCLI, account_name: str ) -> "AzureResources.Storage": """Internal method to create storage account. - + Creates a new Azure storage account with the specified name. This method does NOT update cache or add to resource collections. - + Args: cli_instance: Azure CLI instance for storage operations account_name: Name for the new storage account - + Returns: New Storage instance for the created account. """ @@ -634,10 +634,10 @@ def _create_storage_account( def update_cache(self, cache_client: Cache) -> None: """Update resource configuration in cache. - + Persists current resource state including storage accounts, data storage accounts, and resource groups to filesystem cache. - + Args: cache_client: Cache instance for storing configuration """ @@ -647,9 +647,9 @@ def update_cache(self, cache_client: Cache) -> None: @staticmethod def initialize(res: Resources, dct: dict) -> None: """Initialize resources from dictionary data. - + Populates resource instance with data from configuration dictionary. - + Args: res: Resources instance to initialize dct: Dictionary containing resource configuration @@ -675,7 +675,7 @@ def initialize(res: Resources, dct: dict) -> None: def serialize(self) -> dict: """Serialize resources to dictionary. - + Returns: Dictionary containing all resource configuration data. """ @@ -693,14 +693,14 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: """Deserialize resources from config and cache. - + Loads Azure resources from cache if available, otherwise from configuration. - + Args: config: Configuration dictionary cache: Cache instance for retrieving cached values handlers: Logging handlers for error reporting - + Returns: AzureResources instance with loaded configuration. """ @@ -725,18 +725,18 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AzureConfig(Config): """Complete Azure configuration for SeBS benchmarking. - + Combines Azure credentials and resources into a single configuration object for managing Azure serverless function deployments. - + Attributes: _credentials: Azure service principal credentials _resources: Azure resource management instance """ - + def __init__(self, credentials: AzureCredentials, resources: AzureResources) -> None: """Initialize Azure configuration. - + Args: credentials: Azure service principal credentials resources: Azure resource management instance @@ -748,7 +748,7 @@ def __init__(self, credentials: AzureCredentials, resources: AzureResources) -> @property def credentials(self) -> AzureCredentials: """Get Azure credentials. - + Returns: AzureCredentials instance for authentication. """ @@ -757,7 +757,7 @@ def credentials(self) -> AzureCredentials: @property def resources(self) -> AzureResources: """Get Azure resources manager. - + Returns: AzureResources instance for resource management. """ @@ -766,7 +766,7 @@ def resources(self) -> AzureResources: @staticmethod def initialize(cfg: Config, dct: dict) -> None: """Initialize configuration from dictionary data. - + Args: cfg: Config instance to initialize dct: Dictionary containing configuration data @@ -777,15 +777,15 @@ def initialize(cfg: Config, dct: dict) -> None: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: """Deserialize complete Azure configuration. - + Creates AzureConfig instance from configuration dictionary and cache, combining credentials and resources with region information. - + Args: config: Configuration dictionary cache: Cache instance for storing/retrieving cached values handlers: Logging handlers for error reporting - + Returns: AzureConfig instance with complete Azure configuration. """ @@ -807,9 +807,9 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config def update_cache(self, cache: Cache) -> None: """Update complete configuration in cache. - + Persists region, credentials, and resources to filesystem cache. - + Args: cache: Cache instance for storing configuration """ @@ -819,7 +819,7 @@ def update_cache(self, cache: Cache) -> None: def serialize(self) -> dict: """Serialize complete configuration to dictionary. - + Returns: Dictionary containing all Azure configuration data. """ diff --git a/sebs/azure/cosmosdb.py b/sebs/azure/cosmosdb.py index 37c2daaa..8cb48688 100644 --- a/sebs/azure/cosmosdb.py +++ b/sebs/azure/cosmosdb.py @@ -42,10 +42,10 @@ @dataclass class BenchmarkResources: """Resource container for benchmark-specific CosmosDB resources. - + This dataclass holds the database and container names allocated for a specific benchmark, along with the database client proxy. - + Attributes: database: Name of the CosmosDB database containers: List of container names for the benchmark @@ -59,7 +59,7 @@ class BenchmarkResources: def serialize(self) -> dict: """Serialize benchmark resources to dictionary. - + Returns: Dictionary containing database and container names. """ @@ -68,10 +68,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict) -> "BenchmarkResources": """Deserialize benchmark resources from dictionary. - + Args: config: Dictionary containing resource configuration - + Returns: BenchmarkResources instance with restored configuration. """ diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 0a7eb740..a4501320 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -32,15 +32,15 @@ class AzureFunction(Function): """Azure Function implementation for SeBS benchmarking. - + This class represents an Azure Function with Azure-specific attributes and configuration. It includes storage account information and supports Azure-specific triggers like HTTP triggers. - + Attributes: function_storage: Azure Storage account used for function code storage """ - + def __init__( self, name: str, @@ -50,7 +50,7 @@ def __init__( cfg: FunctionConfig, ) -> None: """Initialize Azure Function. - + Args: name: Name of the Azure Function benchmark: Name of the benchmark this function implements @@ -63,7 +63,7 @@ def __init__( def serialize(self) -> dict: """Serialize function to dictionary. - + Returns: Dictionary containing function data including Azure-specific storage. """ @@ -75,16 +75,16 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: """Deserialize function from cached configuration. - + Recreates an AzureFunction instance from cached data including function configuration, storage account, and triggers. - + Args: cached_config: Dictionary containing cached function data - + Returns: AzureFunction instance with restored configuration. - + Raises: AssertionError: If unknown trigger type is encountered. """ diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 93f2bb72..0710fab4 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -34,18 +34,18 @@ class AzureTrigger(Trigger): """Base class for Azure Function triggers. - + This abstract base class provides common functionality for Azure Function triggers, including data storage account management for benchmark data handling. - + Attributes: _data_storage_account: Azure storage account for benchmark data """ - + def __init__(self, data_storage_account: Optional[AzureResources.Storage] = None) -> None: """Initialize Azure trigger. - + Args: data_storage_account: Optional Azure storage account for data operations """ @@ -55,10 +55,10 @@ def __init__(self, data_storage_account: Optional[AzureResources.Storage] = None @property def data_storage_account(self) -> AzureResources.Storage: """Get the data storage account. - + Returns: Azure storage account for benchmark data. - + Raises: AssertionError: If data storage account is not set. """ @@ -68,7 +68,7 @@ def data_storage_account(self) -> AzureResources.Storage: @data_storage_account.setter def data_storage_account(self, data_storage_account: AzureResources.Storage) -> None: """Set the data storage account. - + Args: data_storage_account: Azure storage account to set """ @@ -77,17 +77,19 @@ def data_storage_account(self, data_storage_account: AzureResources.Storage) -> class HTTPTrigger(AzureTrigger): """HTTP trigger for Azure Functions. - + This class implements HTTP-based invocation of Azure Functions, supporting both synchronous and asynchronous execution patterns for benchmarking. - + Attributes: url: HTTP endpoint URL for the Azure Function """ - - def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None) -> None: + + def __init__( + self, url: str, data_storage_account: Optional[AzureResources.Storage] = None + ) -> None: """Initialize HTTP trigger. - + Args: url: HTTP endpoint URL for the Azure Function data_storage_account: Optional Azure storage account for data operations @@ -98,7 +100,7 @@ def __init__(self, url: str, data_storage_account: Optional[AzureResources.Stora @staticmethod def trigger_type() -> Trigger.TriggerType: """Get the trigger type. - + Returns: HTTP trigger type identifier. """ @@ -106,12 +108,12 @@ def trigger_type() -> Trigger.TriggerType: def sync_invoke(self, payload: dict) -> ExecutionResult: """Synchronously invoke Azure Function via HTTP. - + Sends HTTP request to the function endpoint and waits for response. - + Args: payload: Dictionary payload to send to the function - + Returns: ExecutionResult containing response data and timing information. """ @@ -119,12 +121,12 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: def async_invoke(self, payload: dict) -> concurrent.futures.Future: """Asynchronously invoke Azure Function via HTTP. - + Submits function invocation to a thread pool for parallel execution. - + Args: payload: Dictionary payload to send to the function - + Returns: Future object that can be used to retrieve the result. """ @@ -134,7 +136,7 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: def serialize(self) -> dict: """Serialize trigger to dictionary. - + Returns: Dictionary containing trigger type and URL. """ @@ -143,10 +145,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: """Deserialize trigger from dictionary. - + Args: obj: Dictionary containing trigger data - + Returns: HTTPTrigger instance with restored configuration. """ diff --git a/sebs/benchmark.py b/sebs/benchmark.py index b64d8082..7fedfd5c 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -33,10 +33,10 @@ class BenchmarkConfig: """ Configuration for a benchmark in the Serverless Benchmarking Suite. - + This class stores the configuration parameters for a benchmark, including timeout, memory allocation, supported languages, and included modules. - + Attributes: timeout: Maximum execution time in seconds memory: Memory allocation in MB @@ -44,13 +44,13 @@ class BenchmarkConfig: modules: List of benchmark modules/features required """ - + def __init__( self, timeout: int, memory: int, languages: List["Language"], modules: List[BenchmarkModule] ): """ Initialize a benchmark configuration. - + Args: timeout: Maximum execution time in seconds memory: Memory allocation in MB @@ -66,7 +66,7 @@ def __init__( def timeout(self) -> int: """ Get the maximum execution time in seconds. - + Returns: int: The timeout value """ @@ -76,7 +76,7 @@ def timeout(self) -> int: def timeout(self, val: int): """ Set the maximum execution time in seconds. - + Args: val: The new timeout value """ @@ -86,7 +86,7 @@ def timeout(self, val: int): def memory(self) -> int: """ Get the memory allocation in MB. - + Returns: int: The memory allocation """ @@ -96,7 +96,7 @@ def memory(self) -> int: def memory(self, val: int): """ Set the memory allocation in MB. - + Args: val: The new memory allocation value """ @@ -106,7 +106,7 @@ def memory(self, val: int): def languages(self) -> List["Language"]: """ Get the list of supported programming languages. - + Returns: List[Language]: Supported programming languages """ @@ -116,7 +116,7 @@ def languages(self) -> List["Language"]: def modules(self) -> List[BenchmarkModule]: """ Get the list of benchmark modules/features required. - + Returns: List[BenchmarkModule]: Required benchmark modules """ @@ -126,10 +126,10 @@ def modules(self) -> List[BenchmarkModule]: def deserialize(json_object: dict) -> "BenchmarkConfig": """ Create a BenchmarkConfig instance from a JSON object. - + Args: json_object: Dictionary containing benchmark configuration - + Returns: BenchmarkConfig: A new instance with the deserialized data """ @@ -146,9 +146,9 @@ def deserialize(json_object: dict) -> "BenchmarkConfig": class Benchmark(LoggingBase): """ Creates code package representing a benchmark with all code and assets. - + This class handles building, packaging, and deploying benchmark code for - serverless platforms. It manages dependencies installation within Docker + serverless platforms. It manages dependencies installation within Docker images corresponding to the target cloud deployment. The behavior of the class depends on cache state: @@ -156,7 +156,7 @@ class Benchmark(LoggingBase): 2. Otherwise, the hash of the entire benchmark is computed and compared with the cached value. If changed, it rebuilds the benchmark 3. Otherwise, it returns the path to cached code - + Attributes: benchmark: Name of the benchmark benchmark_path: Path to the benchmark directory @@ -183,7 +183,7 @@ class Benchmark(LoggingBase): def typename() -> str: """ Get the type name of this class. - + Returns: str: The type name """ @@ -193,7 +193,7 @@ def typename() -> str: def benchmark(self) -> str: """ Get the benchmark name. - + Returns: str: Name of the benchmark """ @@ -203,7 +203,7 @@ def benchmark(self) -> str: def benchmark_path(self) -> str: """ Get the path to the benchmark directory. - + Returns: str: Path to the benchmark directory """ @@ -213,7 +213,7 @@ def benchmark_path(self) -> str: def benchmark_config(self) -> BenchmarkConfig: """ Get the benchmark configuration. - + Returns: BenchmarkConfig: Configuration for the benchmark """ @@ -223,7 +223,7 @@ def benchmark_config(self) -> BenchmarkConfig: def code_package(self) -> dict: """ Get the code package information. - + Returns: dict: Dictionary with code package information """ @@ -233,7 +233,7 @@ def code_package(self) -> dict: def functions(self) -> Dict[str, Any]: """ Get the functions for this benchmark. - + Returns: Dict[str, Any]: Dictionary of functions """ @@ -243,7 +243,7 @@ def functions(self) -> Dict[str, Any]: def code_location(self) -> str: """ Get the location of the code package. - + Returns: str: Path to the code package """ @@ -256,7 +256,7 @@ def code_location(self) -> str: def is_cached(self) -> bool: """ Check if the benchmark is cached. - + Returns: bool: True if cached, False otherwise """ @@ -266,7 +266,7 @@ def is_cached(self) -> bool: def is_cached(self, val: bool): """ Set whether the benchmark is cached. - + Args: val: True if cached, False otherwise """ @@ -276,7 +276,7 @@ def is_cached(self, val: bool): def is_cached_valid(self) -> bool: """ Check if the cached benchmark is valid. - + Returns: bool: True if valid, False otherwise """ @@ -286,7 +286,7 @@ def is_cached_valid(self) -> bool: def is_cached_valid(self, val: bool): """ Set whether the cached benchmark is valid. - + Args: val: True if valid, False otherwise """ @@ -296,7 +296,7 @@ def is_cached_valid(self, val: bool): def code_size(self) -> int: """ Get the size of the code package in bytes. - + Returns: int: Size in bytes """ @@ -306,10 +306,10 @@ def code_size(self) -> int: def container_uri(self) -> str: """ Get the URI of the container for container deployments. - + Returns: str: Container URI - + Raises: AssertionError: If container URI is None """ @@ -320,7 +320,7 @@ def container_uri(self) -> str: def language(self) -> "Language": """ Get the programming language for the benchmark. - + Returns: Language: Programming language """ @@ -330,7 +330,7 @@ def language(self) -> "Language": def language_name(self) -> str: """ Get the name of the programming language. - + Returns: str: Name of the language """ @@ -340,7 +340,7 @@ def language_name(self) -> str: def language_version(self) -> str: """ Get the version of the programming language. - + Returns: str: Version of the language """ @@ -350,7 +350,7 @@ def language_version(self) -> str: def has_input_processed(self) -> bool: """ Check if input processing has been performed. - + Returns: bool: True if processed, False otherwise """ @@ -360,7 +360,7 @@ def has_input_processed(self) -> bool: def uses_storage(self) -> bool: """ Check if the benchmark uses cloud storage. - + Returns: bool: True if using storage, False otherwise """ @@ -370,7 +370,7 @@ def uses_storage(self) -> bool: def uses_nosql(self) -> bool: """ Check if the benchmark uses NoSQL databases. - + Returns: bool: True if using NoSQL, False otherwise """ @@ -380,7 +380,7 @@ def uses_nosql(self) -> bool: def architecture(self) -> str: """ Get the CPU architecture of the deployment target. - + Returns: str: Architecture name (e.g., 'x86_64', 'arm64') """ @@ -390,7 +390,7 @@ def architecture(self) -> str: def container_deployment(self) -> bool: """ Check if using container deployment. - + Returns: bool: True if using container deployment, False otherwise """ @@ -400,10 +400,10 @@ def container_deployment(self) -> bool: def hash(self) -> str: """ Get the hash of the benchmark code. - + Computes an MD5 hash of the benchmark directory to determine if the code has changed since the last build. - + Returns: str: MD5 hash as a hexadecimal string """ @@ -415,9 +415,9 @@ def hash(self) -> str: def hash(self, val: str): """ Set the hash of the benchmark code. - + Used only for testing purposes. - + Args: val: MD5 hash as a hexadecimal string """ @@ -435,11 +435,11 @@ def __init__( ): """ Initialize a Benchmark instance. - + Sets up a benchmark for a specific deployment platform, including configuration, language runtime, and caching. Loads the benchmark configuration from the JSON file and validates the language support. - + Args: benchmark: Name of the benchmark deployment_name: Name of the deployment platform (e.g., 'aws', 'azure') @@ -448,7 +448,7 @@ def __init__( output_dir: Directory for output files cache_client: Cache client for caching code packages docker_client: Docker client for building dependencies - + Raises: RuntimeError: If the benchmark is not found or doesn't support the language """ @@ -503,16 +503,16 @@ def __init__( def hash_directory(directory: str, deployment: str, language: str) -> str: """ Compute MD5 hash of an entire directory. - + Calculates a hash of the benchmark source code by combining hashes of all relevant files. This includes language-specific files, deployment wrappers, and shared files like shell scripts and JSON configuration. - + Args: directory: Path to the directory to hash deployment: Name of the deployment platform language: Programming language name - + Returns: str: MD5 hash as a hexadecimal string """ @@ -542,7 +542,7 @@ def hash_directory(directory: str, deployment: str, language: str) -> str: def serialize(self) -> dict: """ Serialize the benchmark to a dictionary. - + Returns: dict: Dictionary containing size and hash of the benchmark code """ @@ -551,7 +551,7 @@ def serialize(self) -> dict: def query_cache(self) -> None: """ Query the cache for existing benchmark code packages and functions. - + Checks if there's a cached code package or container for this benchmark and deployment combination. Updates the cache status fields based on whether the cache exists and if it's still valid (hash matches). diff --git a/sebs/cache.py b/sebs/cache.py index 185f0d9b..d58eac73 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -23,7 +23,7 @@ import os import shutil import threading -from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING # noqa +from typing import Any, Callable, Dict, List, Mapping, Optional, TYPE_CHECKING # noqa from sebs.utils import LoggingBase, serialize @@ -32,16 +32,16 @@ from sebs.faas.function import Function -def update(d: Dict[str, Any], u: Dict[str, Any]) -> Dict[str, Any]: +def update(d: Dict[str, Any], u: Mapping[str, Any]) -> Dict[str, Any]: """Recursively update nested dictionary with another dictionary. - + This function performs deep merge of two dictionaries, updating nested dictionary values rather than replacing them entirely. - + Args: d (Dict[str, Any]): The target dictionary to update. - u (Dict[str, Any]): The source dictionary with updates. - + u (Mapping[str, Any]): The source dictionary with updates. + Returns: Dict[str, Any]: The updated dictionary. """ @@ -55,15 +55,16 @@ def update(d: Dict[str, Any], u: Dict[str, Any]) -> Dict[str, Any]: def update_dict(cfg: Dict[str, Any], val: Any, keys: List[str]) -> None: """Update dictionary value at nested key path. - + Updates a nested dictionary by setting a value at a path specified by a list of keys. Creates intermediate dictionaries as needed. - + Args: cfg (Dict[str, Any]): The dictionary to update. val (Any): The value to set at the key path. keys (List[str]): List of keys forming the path to the target location. """ + def map_keys(obj: Dict[str, Any], val: Any, keys: List[str]) -> Dict[str, Any]: if len(keys): return {keys[0]: map_keys(obj, val, keys[1:])} @@ -75,12 +76,12 @@ def map_keys(obj: Dict[str, Any], val: Any, keys: List[str]) -> Dict[str, Any]: class Cache(LoggingBase): """Persistent caching system for SeBS benchmark configurations and deployments. - + This class provides comprehensive caching functionality for SeBS benchmarks, including configuration management, code package storage, function tracking, and cloud resource management. It uses a file-based cache system with thread-safe operations. - + Attributes: cached_config (Dict[str, Any]): In-memory cache of cloud configurations. config_updated (bool): Flag indicating if configuration needs to be saved. @@ -89,17 +90,17 @@ class Cache(LoggingBase): ignore_storage (bool): Flag to skip storage resource caching. docker_client (docker.DockerClient): Docker client for container operations. """ - + cached_config: Dict[str, Any] = {} config_updated: bool = False def __init__(self, cache_dir: str, docker_client: docker.DockerClient) -> None: """Initialize the Cache with directory and Docker client. - + Sets up the cache directory structure and loads existing configurations. Creates the cache directory if it doesn't exist, otherwise loads existing cached configurations. - + Args: cache_dir (str): Path to the cache directory. docker_client (docker.DockerClient): Docker client for container operations. @@ -118,7 +119,7 @@ def __init__(self, cache_dir: str, docker_client: docker.DockerClient) -> None: @staticmethod def typename() -> str: """Get the typename for this cache. - + Returns: str: The cache type name. """ @@ -126,7 +127,7 @@ def typename() -> str: def load_config(self) -> None: """Load cached cloud configurations from disk. - + Reads configuration files for all supported cloud platforms from the cache directory and loads them into memory. """ @@ -139,10 +140,10 @@ def load_config(self) -> None: def get_config(self, cloud: str) -> Optional[Dict[str, Any]]: """Get cached configuration for a specific cloud provider. - + Args: cloud (str): Cloud provider name (e.g., 'aws', 'azure', 'gcp'). - + Returns: Optional[Dict[str, Any]]: The cached configuration or None if not found. """ @@ -150,11 +151,11 @@ def get_config(self, cloud: str) -> Optional[Dict[str, Any]]: def update_config(self, val: Any, keys: List[str]) -> None: """Update configuration values at nested key path. - + Updates cached configuration by setting a value at the specified nested key path. Sets the config_updated flag to ensure changes are persisted to disk. - + Args: val (Any): New value to store. keys (List[str]): Array of consecutive keys for multi-level dictionary. @@ -173,7 +174,7 @@ def unlock(self) -> None: def shutdown(self) -> None: """Save cached configurations to disk if they were updated. - + Writes all updated cloud configurations back to their respective JSON files in the cache directory. """ @@ -187,11 +188,11 @@ def shutdown(self) -> None: def get_benchmark_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: """Access cached configuration of a benchmark. - + Args: deployment (str): Deployment platform ('aws', 'azure', 'gcp', 'openwhisk', 'local'). benchmark (str): Benchmark name (e.g., '110.dynamic-html'). - + Returns: Optional[Dict[str, Any]]: Benchmark configuration or None if not found. """ @@ -213,14 +214,14 @@ def get_code_package( architecture: str, ) -> Optional[Dict[str, Any]]: """Access cached version of benchmark code package. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. language (str): Programming language. language_version (str): Language version. architecture (str): Target architecture. - + Returns: Optional[Dict[str, Any]]: Code package configuration or None if not found. """ @@ -241,14 +242,14 @@ def get_container( architecture: str, ) -> Optional[Dict[str, Any]]: """Access cached container configuration for a benchmark. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. language (str): Programming language. language_version (str): Language version. architecture (str): Target architecture. - + Returns: Optional[Dict[str, Any]]: Container configuration or None if not found. """ @@ -264,12 +265,12 @@ def get_functions( self, deployment: str, benchmark: str, language: str ) -> Optional[Dict[str, Any]]: """Get cached function configurations for a benchmark. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. language (str): Programming language. - + Returns: Optional[Dict[str, Any]]: Function configurations or None if not found. """ @@ -281,11 +282,11 @@ def get_functions( def get_storage_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: """Access cached storage configuration of a benchmark. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. - + Returns: Optional[Dict[str, Any]]: Storage configuration or None if not found. """ @@ -293,24 +294,26 @@ def get_storage_config(self, deployment: str, benchmark: str) -> Optional[Dict[s def get_nosql_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: """Access cached NoSQL configuration of a benchmark. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. - + Returns: Optional[Dict[str, Any]]: NoSQL configuration or None if not found. """ return self._get_resource_config(deployment, benchmark, "nosql") - def _get_resource_config(self, deployment: str, benchmark: str, resource: str) -> Optional[Dict[str, Any]]: + def _get_resource_config( + self, deployment: str, benchmark: str, resource: str + ) -> Optional[Dict[str, Any]]: """Get cached resource configuration for a benchmark. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. resource (str): Resource type ('storage' or 'nosql'). - + Returns: Optional[Dict[str, Any]]: Resource configuration or None if not found. """ @@ -319,7 +322,7 @@ def _get_resource_config(self, deployment: str, benchmark: str, resource: str) - def update_storage(self, deployment: str, benchmark: str, config: Dict[str, Any]) -> None: """Update cached storage configuration for a benchmark. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. @@ -332,7 +335,7 @@ def update_storage(self, deployment: str, benchmark: str, config: Dict[str, Any] def update_nosql(self, deployment: str, benchmark: str, config: Dict[str, Any]) -> None: """Update cached NoSQL configuration for a benchmark. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. @@ -342,13 +345,15 @@ def update_nosql(self, deployment: str, benchmark: str, config: Dict[str, Any]) return self._update_resources(deployment, benchmark, "nosql", config) - def _update_resources(self, deployment: str, benchmark: str, resource: str, config: Dict[str, Any]) -> None: + def _update_resources( + self, deployment: str, benchmark: str, resource: str, config: Dict[str, Any] + ) -> None: """Update cached resource configuration for a benchmark. - + This method handles caching of resource configurations (storage, nosql) for benchmarks. It creates the benchmark directory if it doesn't exist and updates the configuration file. - + Args: deployment (str): Deployment platform name. benchmark (str): Benchmark name. @@ -383,14 +388,14 @@ def add_code_package( code_package: "Benchmark", ) -> None: """Add a new code package to the cache. - + Caches a compiled benchmark code package (either directory or ZIP file) along with its configuration. Handles both package and container deployments. - + Args: deployment_name (str): Name of the deployment platform. code_package (Benchmark): The benchmark code package to cache. - + Raises: RuntimeError: If cached application already exists for the deployment. """ @@ -503,10 +508,10 @@ def update_code_package( code_package: "Benchmark", ) -> None: """Update an existing code package in the cache. - + Updates cached code package with new content and metadata. If the cached package doesn't exist, adds it as a new package. - + Args: deployment_name (str): Name of the deployment platform. code_package (Benchmark): The benchmark code package to update. @@ -579,16 +584,16 @@ def add_function( function: "Function", ) -> None: """Add new function to cache. - + Caches a deployed function configuration for a benchmark. Links the function to its corresponding code package. - + Args: deployment_name (str): Name of the deployment platform. language_name (str): Programming language name. code_package (Benchmark): The benchmark code package. function (Function): The deployed function to cache. - + Raises: RuntimeError: If code package doesn't exist in cache. """ @@ -620,13 +625,13 @@ def add_function( def update_function(self, function: "Function") -> None: """Update an existing function in the cache. - + Updates cached function configuration with new metadata. Searches across all deployments and languages to find the function by name. - + Args: function (Function): The function with updated configuration. - + Raises: RuntimeError: If function's code package doesn't exist in cache. """ diff --git a/sebs/config.py b/sebs/config.py index 6a8d5b56..d3cd388b 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -17,22 +17,22 @@ class SeBSConfig: """Central configuration manager for SeBS framework. - + This class manages all configuration settings for the SeBS benchmarking suite, including system configurations, Docker settings, deployment options, and platform-specific parameters. It loads configuration from systems.json and provides convenient access methods for various configuration aspects. - + Attributes: _system_config (Dict): The loaded system configuration from systems.json. _image_tag_prefix (str): Custom prefix for Docker image tags. """ - + def __init__(self) -> None: """Initialize SeBSConfig by loading system configuration. - + Loads the systems.json configuration file and initializes the image tag prefix. - + Raises: FileNotFoundError: If systems.json configuration file is not found. json.JSONDecodeError: If systems.json contains invalid JSON. @@ -44,7 +44,7 @@ def __init__(self) -> None: @property def image_tag_prefix(self) -> str: """Get the current Docker image tag prefix. - + Returns: str: The current image tag prefix. """ @@ -53,7 +53,7 @@ def image_tag_prefix(self) -> str: @image_tag_prefix.setter def image_tag_prefix(self, tag: str) -> None: """Set the Docker image tag prefix. - + Args: tag (str): The prefix to use for Docker image tags. """ @@ -61,7 +61,7 @@ def image_tag_prefix(self, tag: str) -> None: def docker_repository(self) -> str: """Get the Docker repository name from configuration. - + Returns: str: The Docker repository name configured in systems.json. """ @@ -69,11 +69,11 @@ def docker_repository(self) -> str: def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[str, str]: """Get deployment packages for a specific deployment and language. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). language_name (str): Programming language name (e.g., 'python', 'nodejs'). - + Returns: Dict[str, str]: Dictionary mapping package names to their versions. """ @@ -85,11 +85,11 @@ def deployment_module_packages( self, deployment_name: str, language_name: str ) -> Dict[str, str]: """Get deployment module packages for a specific deployment and language. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). language_name (str): Programming language name (e.g., 'python', 'nodejs'). - + Returns: Dict[str, str]: Dictionary mapping module package names to their versions. """ @@ -99,11 +99,11 @@ def deployment_module_packages( def deployment_files(self, deployment_name: str, language_name: str) -> List[str]: """Get deployment files list for a specific deployment and language. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). language_name (str): Programming language name (e.g., 'python', 'nodejs'). - + Returns: List[str]: List of required deployment files. """ @@ -113,11 +113,11 @@ def deployment_files(self, deployment_name: str, language_name: str) -> List[str def docker_image_types(self, deployment_name: str, language_name: str) -> List[str]: """Get available Docker image types for a deployment and language. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). language_name (str): Programming language name (e.g., 'python', 'nodejs'). - + Returns: List[str]: List of available Docker image types. """ @@ -127,12 +127,12 @@ def supported_language_versions( self, deployment_name: str, language_name: str, architecture: str ) -> List[str]: """Get supported language versions for a deployment, language, and architecture. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). language_name (str): Programming language name (e.g., 'python', 'nodejs'). architecture (str): Target architecture (e.g., 'x64', 'arm64'). - + Returns: List[str]: List of supported language versions. """ @@ -142,10 +142,10 @@ def supported_language_versions( def supported_architecture(self, deployment_name: str) -> List[str]: """Get supported architectures for a deployment platform. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). - + Returns: List[str]: List of supported architectures (e.g., ['x64', 'arm64']). """ @@ -153,10 +153,10 @@ def supported_architecture(self, deployment_name: str) -> List[str]: def supported_package_deployment(self, deployment_name: str) -> bool: """Check if package-based deployment is supported for a platform. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). - + Returns: bool: True if package deployment is supported, False otherwise. """ @@ -164,10 +164,10 @@ def supported_package_deployment(self, deployment_name: str) -> bool: def supported_container_deployment(self, deployment_name: str) -> bool: """Check if container-based deployment is supported for a platform. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). - + Returns: bool: True if container deployment is supported, False otherwise. """ @@ -177,12 +177,12 @@ def benchmark_base_images( self, deployment_name: str, language_name: str, architecture: str ) -> Dict[str, str]: """Get base Docker images for benchmarks on a specific platform. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). language_name (str): Programming language name (e.g., 'python', 'nodejs'). architecture (str): Target architecture (e.g., 'x64', 'arm64'). - + Returns: Dict[str, str]: Dictionary mapping language versions to base image names. """ @@ -192,7 +192,7 @@ def benchmark_base_images( def version(self) -> str: """Get the SeBS framework version. - + Returns: str: The SeBS version string, or 'unknown' if not configured. """ @@ -208,7 +208,7 @@ def benchmark_image_name( registry: Optional[str] = None, ) -> str: """Generate full Docker image name for a benchmark. - + Args: system (str): Deployment system name (e.g., 'aws', 'azure'). benchmark (str): Benchmark name (e.g., '110.dynamic-html'). @@ -216,7 +216,7 @@ def benchmark_image_name( language_version (str): Language version (e.g., '3.8'). architecture (str): Target architecture (e.g., 'x64'). registry (Optional[str]): Docker registry URL. If None, uses default repository. - + Returns: str: Complete Docker image name including registry and tag. """ @@ -238,17 +238,17 @@ def benchmark_image_tag( architecture: str, ) -> str: """Generate Docker image tag for a benchmark. - + Creates a standardized tag format that includes system, benchmark, language, version, architecture, optional prefix, and SeBS version. - + Args: system (str): Deployment system name (e.g., 'aws', 'azure'). benchmark (str): Benchmark name (e.g., '110.dynamic-html'). language_name (str): Programming language name (e.g., 'python'). language_version (str): Language version (e.g., '3.8'). architecture (str): Target architecture (e.g., 'x64'). - + Returns: str: Generated Docker image tag. """ @@ -261,11 +261,11 @@ def benchmark_image_tag( def username(self, deployment_name: str, language_name: str) -> str: """Get the username for a specific deployment and language configuration. - + Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). language_name (str): Programming language name (e.g., 'python', 'nodejs'). - + Returns: str: The username configured for the deployment and language combination. """ diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index fa516626..2a747c69 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -19,11 +19,11 @@ class Config: """Configuration class for benchmark experiments. - + This class manages the configuration settings for benchmark experiments, including runtime environment, architecture, deployment type, and experiment-specific settings. - + Attributes: _update_code: Whether to update function code _update_storage: Whether to update storage resources @@ -34,7 +34,7 @@ class Config: _experiment_configs: Dictionary of experiment-specific settings _runtime: Runtime environment (language and version) """ - + def __init__(self): """Initialize a new experiment configuration with default values.""" self._update_code: bool = False @@ -49,7 +49,7 @@ def __init__(self): @property def update_code(self) -> bool: """Get whether to update function code. - + Returns: True if function code should be updated, False otherwise """ @@ -58,7 +58,7 @@ def update_code(self) -> bool: @update_code.setter def update_code(self, val: bool): """Set whether to update function code. - + Args: val: True if function code should be updated, False otherwise """ @@ -67,7 +67,7 @@ def update_code(self, val: bool): @property def update_storage(self) -> bool: """Get whether to update storage resources. - + Returns: True if storage resources should be updated, False otherwise """ @@ -75,10 +75,10 @@ def update_storage(self) -> bool: def check_flag(self, key: str) -> bool: """Check if a flag is set. - + Args: key: Name of the flag to check - + Returns: Value of the flag, or False if the flag is not set """ @@ -87,7 +87,7 @@ def check_flag(self, key: str) -> bool: @property def runtime(self) -> Runtime: """Get the runtime environment. - + Returns: Runtime environment (language and version) """ @@ -96,7 +96,7 @@ def runtime(self) -> Runtime: @property def architecture(self) -> str: """Get the CPU architecture. - + Returns: CPU architecture (e.g., "x64", "arm64") """ @@ -105,7 +105,7 @@ def architecture(self) -> str: @property def container_deployment(self) -> bool: """Get whether to use container-based deployment. - + Returns: True if container-based deployment should be used, False otherwise """ @@ -113,13 +113,13 @@ def container_deployment(self) -> bool: def experiment_settings(self, name: str) -> dict: """Get settings for a specific experiment. - + Args: name: Name of the experiment - + Returns: Dictionary of experiment-specific settings - + Raises: KeyError: If the experiment name is not found in the configuration """ @@ -127,10 +127,10 @@ def experiment_settings(self, name: str) -> dict: def serialize(self) -> dict: """Serialize the configuration to a dictionary. - + This method converts the configuration object to a dictionary that can be saved to a file or passed to other components. - + Returns: Dictionary representation of the configuration """ @@ -150,17 +150,17 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict) -> "Config": """Deserialize a configuration from a dictionary. - + This method creates a new configuration object from a dictionary representation, which may have been loaded from a file or passed from another component. - + Args: config: Dictionary representation of the configuration - + Returns: A new configuration object with settings from the dictionary - + Note: This method requires Python 3.7+ for proper type annotations. The string type annotation is a forward reference to the Config class. diff --git a/sebs/experiments/environment.py b/sebs/experiments/environment.py index ec30bab4..8271acee 100644 --- a/sebs/experiments/environment.py +++ b/sebs/experiments/environment.py @@ -23,11 +23,11 @@ class ExperimentEnvironment: """Environment management for benchmark experiments. - + This class provides methods to control CPU settings, memory management, and other system configurations that can affect benchmark results. It focuses on creating a stable, reproducible environment for experiments. - + Attributes: _cpu_mapping: Dictionary mapping physical cores to logical cores _vendor: CPU vendor identifier (currently only "intel" supported) @@ -35,13 +35,14 @@ class ExperimentEnvironment: _prev_boost_status: Previous boost status for restoration _prev_min_freq: Previous minimum frequency setting for restoration """ + def __init__(self) -> None: """Initialize the experiment environment. - + Discovers CPU topology, checks vendor compatibility, and verifies the CPU frequency scaling driver. Currently only supports Intel CPUs with the intel_pstate driver. - + Raises: NotImplementedError: If CPU vendor is not Intel or scaling driver is not intel_pstate @@ -90,7 +91,7 @@ def __init__(self) -> None: def write_cpu_status(self, cores: List[int], status: int) -> None: """Write CPU online status for specified cores. - + Args: cores: List of physical core IDs to modify status: Status to set (0 for offline, 1 for online) @@ -108,7 +109,7 @@ def write_cpu_status(self, cores: List[int], status: int) -> None: def disable_hyperthreading(self, cores: List[int]) -> None: """Disable hyperthreading for specified cores. - + Args: cores: List of physical core IDs to disable hyperthreading for """ @@ -116,7 +117,7 @@ def disable_hyperthreading(self, cores: List[int]) -> None: def enable_hyperthreading(self, cores: List[int]) -> None: """Enable hyperthreading for specified cores. - + Args: cores: List of physical core IDs to enable hyperthreading for """ @@ -124,10 +125,10 @@ def enable_hyperthreading(self, cores: List[int]) -> None: def disable_boost(self, cores: List[int]) -> None: """Disable CPU boost (turbo) for specified cores. - + Args: cores: List of physical core IDs to disable boost for - + Raises: NotImplementedError: If CPU governor is not intel_pstate """ @@ -140,12 +141,12 @@ def disable_boost(self, cores: List[int]) -> None: def enable_boost(self, cores: List[int]) -> None: """Enable CPU boost (turbo) for specified cores. - + Restores the previous boost status that was saved when boost was disabled. - + Args: cores: List of physical core IDs to enable boost for - + Raises: NotImplementedError: If CPU governor is not intel_pstate """ @@ -161,7 +162,7 @@ def enable_boost(self, cores: List[int]) -> None: def drop_page_cache(self) -> None: """Drop system page cache to ensure clean memory state. - + This method clears the page cache to prevent cached data from affecting benchmark measurements. """ @@ -169,7 +170,7 @@ def drop_page_cache(self) -> None: def set_frequency(self, max_freq: int) -> None: """Set minimum CPU frequency percentage. - + Args: max_freq: Minimum frequency percentage (0-100) """ @@ -179,7 +180,7 @@ def set_frequency(self, max_freq: int) -> None: def unset_frequency(self) -> None: """Restore previous minimum CPU frequency setting. - + Restores the frequency setting that was saved when set_frequency was called. """ @@ -188,14 +189,14 @@ def unset_frequency(self) -> None: def setup_benchmarking(self, cores: List[int]) -> None: """Set up the environment for stable benchmarking. - + This method applies a standard set of optimizations to create a stable environment for benchmarking: - Disables CPU boost/turbo - Disables hyperthreading - Sets CPU frequency to maximum - Drops page cache - + Args: cores: List of physical core IDs to configure """ @@ -206,13 +207,13 @@ def setup_benchmarking(self, cores: List[int]) -> None: def after_benchmarking(self, cores: List[int]) -> None: """Restore environment settings after benchmarking. - + This method restores the system to its previous state after benchmarking is complete: - Re-enables CPU boost/turbo - Re-enables hyperthreading - Restores frequency settings - + Args: cores: List of physical core IDs to restore """ diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index dc3c231c..1d70d0b6 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -30,16 +30,16 @@ class EvictionModel(Experiment): """Container eviction model experiment. - - This experiment measures how serverless platforms manage function - container eviction. It determines how long idle containers are kept - alive before being recycled by the platform, which affects cold start + + This experiment measures how serverless platforms manage function + container eviction. It determines how long idle containers are kept + alive before being recycled by the platform, which affects cold start frequency. - + The experiment invokes functions at different time intervals (defined in the 'times' list) and observes when cold starts occur, thus inferring the platform's container caching and eviction policies. - + Attributes: times: List of time intervals (in seconds) between invocations _function: Function to invoke @@ -52,7 +52,7 @@ class EvictionModel(Experiment): # Time intervals (in seconds) between invocations # Uncomment additional intervals as needed for longer tests times = [ - 1, # 1 second + 1, # 1 second # 2, # 2 seconds # 4, # 4 seconds # 8, # 8 seconds @@ -77,7 +77,7 @@ class EvictionModel(Experiment): def __init__(self, config: ExperimentConfig): """Initialize a new EvictionModel experiment. - + Args: config: Experiment configuration """ @@ -86,7 +86,7 @@ def __init__(self, config: ExperimentConfig): @staticmethod def name() -> str: """Get the name of the experiment. - + Returns: The name "eviction-model" """ @@ -95,7 +95,7 @@ def name() -> str: @staticmethod def typename() -> str: """Get the type name of the experiment. - + Returns: The type name "Experiment.EvictionModel" """ @@ -104,12 +104,12 @@ def typename() -> str: @staticmethod def accept_replies(port: int, invocations: int) -> None: """Accept TCP connections from functions and respond to them. - + This static method acts as a TCP server, accepting connections from functions and responding to them. It runs two rounds of connection acceptance to ensure functions receive a response. The method logs all activity to a file. - + Args: port: TCP port to listen on invocations: Number of expected function invocations @@ -153,23 +153,25 @@ def accept_replies(port: int, invocations: int) -> None: s.close() @staticmethod - def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payload: dict) -> dict: + def execute_instance( + sleep_time: int, pid: int, tid: int, func: Function, payload: dict + ) -> dict: """Execute a single instance of the eviction model test. - + This method performs two invocations of a function with a sleep interval between them. The first invocation should be a cold start, and the second will indicate whether the container was evicted during the sleep period. - + Args: sleep_time: Time to sleep between invocations (seconds) pid: Process ID for logging tid: Thread ID for logging func: Function to invoke payload: Payload to send to the function - + Returns: Dictionary with invocation results and timing information - + Raises: RuntimeError: If the first invocation fails """ @@ -218,11 +220,11 @@ def process_function( payload: dict, ) -> List[dict]: """Process a function with multiple time intervals. - + This method executes multiple functions with different sleep times in parallel, starting with the largest sleep time to overlap executions. The total time should be equal to the maximum execution time. - + Args: repetition: Current repetition number pid: Process ID for logging @@ -230,10 +232,10 @@ def process_function( functions: List of functions to invoke times: List of sleep times corresponding to functions payload: Payload to send to functions - + Returns: List of dictionaries containing invocation results - + Raises: RuntimeError: If any execution fails """ @@ -275,12 +277,12 @@ def process_function( def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. - - This method sets up the benchmark, functions, and output directory for + + This method sets up the benchmark, functions, and output directory for the experiment. It creates a separate function for each time interval and copy combination, allowing for parallel testing of different eviction times. - + Args: sebs_client: The SeBS client to use deployment_client: The deployment client to use @@ -291,7 +293,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: ) self._deployment_client = deployment_client self._result = ExperimentResult(self.config, deployment_client.config) - + # Create function names for each time interval and copy name = deployment_client.default_function_name(self._benchmark) self.functions_names = [ @@ -299,12 +301,12 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: for time in self.times for copy in range(self.function_copies_per_time) ] - + # Create output directory self._out_dir = os.path.join(sebs_client.output_dir, "eviction-model") if not os.path.exists(self._out_dir): os.mkdir(self._out_dir) - + self.functions = [] for fname in self.functions_names: @@ -315,12 +317,12 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: def run(self) -> None: """Execute the eviction model experiment. - + This method runs the main eviction model experiment by: 1. Setting up server instances to handle function responses 2. Executing parallel invocations with different sleep times 3. Collecting and storing results - + The experiment determines container eviction patterns by measuring whether functions experience cold starts after different idle periods. """ diff --git a/sebs/experiments/experiment.py b/sebs/experiments/experiment.py index 34e775d5..86c28732 100644 --- a/sebs/experiments/experiment.py +++ b/sebs/experiments/experiment.py @@ -24,22 +24,22 @@ class Experiment(ABC, LoggingBase): """Abstract base class for all serverless benchmark experiments. - + This class provides the common functionality and interface for all experiment implementations. It manages configuration, handles logging, and defines the abstract methods that must be implemented by specific experiment types. - + Attributes: config: Experiment configuration settings _threads: Number of concurrent threads to use for the experiment _invocations: Number of function invocations to perform _invocation_barrier: Semaphore for coordinating parallel invocations """ - + def __init__(self, cfg: ExperimentConfig): """Initialize a new experiment. - + Args: cfg: Experiment configuration settings """ @@ -52,7 +52,7 @@ def __init__(self, cfg: ExperimentConfig): @property def config(self) -> ExperimentConfig: """Get the experiment configuration. - + Returns: The experiment configuration """ @@ -62,11 +62,11 @@ def config(self) -> ExperimentConfig: @abstractmethod def name() -> str: """Get the name of the experiment. - + This method must be implemented by all subclasses to return a unique name for the experiment type, which is used for configuration and identification. - + Returns: A string name for the experiment """ @@ -76,11 +76,11 @@ def name() -> str: @abstractmethod def typename() -> str: """Get the type name of the experiment. - + This method must be implemented by all subclasses to return a human-readable type name for the experiment, which is used for display and reporting. - + Returns: A string type name for the experiment """ diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index dfeb5e39..f5e3d70c 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -31,11 +31,11 @@ class CodePackageSize: """Helper class for code package size experiments. - + This class handles creating and deploying functions with different code package sizes to measure the impact of package size on deployment and invocation overhead. - + Attributes: _benchmark_path: Path to the benchmark code _benchmark: Benchmark instance @@ -43,10 +43,10 @@ class CodePackageSize: sizes: List of code package sizes to test functions: Dictionary mapping size to function instances """ - + def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings: dict): """Initialize a new code package size experiment. - + Args: deployment_client: Deployment client to use benchmark: Benchmark instance @@ -78,7 +78,7 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings def before_sample(self, size: int, input_benchmark: dict) -> None: """Prepare the benchmark with a specific code package size. - + Args: size: Size of the code package to create input_benchmark: Benchmark input configuration (unused) @@ -91,16 +91,17 @@ def before_sample(self, size: int, input_benchmark: dict) -> None: class PayloadSize: """Helper class for payload size experiments. - + This class handles creating different payload sizes to measure the impact of input data size on function invocation overhead. - + Attributes: pts: List of payload sizes to test """ + def __init__(self, settings: dict) -> None: """Initialize a new payload size experiment. - + Args: settings: Experiment settings with payload_begin, payload_end, and payload_points values @@ -116,7 +117,7 @@ def __init__(self, settings: dict) -> None: def before_sample(self, size: int, input_benchmark: dict) -> None: """Prepare the benchmark input with a specific payload size. - + Args: size: Size of the payload to create input_benchmark: Benchmark input configuration to modify @@ -131,11 +132,11 @@ def before_sample(self, size: int, input_benchmark: dict) -> None: class InvocationOverhead(Experiment): """Invocation overhead measurement experiment. - + This experiment measures the overhead associated with invoking serverless functions. It can measure the impact of code package size, input data size, and different invocation methods on performance. - + Attributes: settings: Experiment-specific settings _benchmark: Benchmark to use @@ -147,10 +148,10 @@ class InvocationOverhead(Experiment): _deployment_client: Deployment client to use _sebs_client: SeBS client """ - + def __init__(self, config: ExperimentConfig): """Initialize a new InvocationOverhead experiment. - + Args: config: Experiment configuration """ @@ -159,11 +160,11 @@ def __init__(self, config: ExperimentConfig): def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. - + This method sets up the benchmark, function, storage, and output directory for the experiment. It uses the clock-synchronization benchmark as a base and prepares the necessary resources for measuring invocation overhead. - + Args: sebs_client: The SeBS client to use deployment_client: The deployment client to use @@ -181,7 +182,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: self.benchmark_input = self._benchmark.prepare_input( deployment_client.system_resources, size="test", replace_existing=True ) - + # Get storage for testing self._storage = deployment_client.system_resources.get_storage(replace_existing=True) @@ -205,7 +206,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: def run(self) -> None: """Execute the invocation overhead experiment. - + This method runs the main experiment by: 1. Setting up either code package size or payload size experiments 2. Running warm-up and cold start invocations @@ -286,12 +287,12 @@ def process( extend_time_interval: int, ) -> None: """Process experiment results and generate summary statistics. - + This method processes the raw experiment results by: 1. Loading timing data from CSV files 2. Computing clock drift and round-trip time 3. Creating a processed results file with invocation times - + Args: sebs_client: SeBS client instance deployment_client: Deployment client instance @@ -359,24 +360,26 @@ def process( invocation_time = float(row[5]) - float(row[4]) - float(row[3]) + clock_drift writer.writerow(row + [clock_drift, clock_drift_std, invocation_time]) - def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, ip: str) -> List: + def receive_datagrams( + self, input_benchmark: dict, repetitions: int, port: int, ip: str + ) -> List: """Receive UDP datagrams from the function for clock synchronization. - + This method implements a UDP server that communicates with the function to measure clock synchronization and network timing. It receives datagrams from the function and responds to them, measuring timing information. - + Args: input_benchmark: Benchmark input configuration repetitions: Number of repetitions to perform port: UDP port to listen on ip: IP address of the client - + Returns: List containing invocation results: [is_cold, connection_time, start_timestamp, finish_timestamp, request_id] - + Raises: RuntimeError: If function invocation fails """ @@ -449,7 +452,7 @@ def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, @staticmethod def name() -> str: """Get the name of the experiment. - + Returns: The name "invocation-overhead" """ @@ -458,7 +461,7 @@ def name() -> str: @staticmethod def typename() -> str: """Get the type name of the experiment. - + Returns: The type name "Experiment.InvocOverhead" """ diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index a65cf521..2f3f5672 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -34,12 +34,12 @@ class NetworkPingPong(Experiment): """Network latency and throughput measurement experiment. - + This experiment measures the network performance characteristics between the client, serverless functions, and storage services. It can measure ping-pong latency and throughput with different payload sizes and concurrency levels. - + Attributes: benchmark_input: Input configuration for the benchmark _storage: Storage service to use for testing @@ -49,10 +49,10 @@ class NetworkPingPong(Experiment): _deployment_client: Deployment client to use _sebs_client: SeBS client """ - + def __init__(self, config: ExperimentConfig): """Initialize a new NetworkPingPong experiment. - + Args: config: Experiment configuration """ @@ -60,11 +60,11 @@ def __init__(self, config: ExperimentConfig): def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. - + This method sets up the benchmark, function, triggers, storage, and output directory for the experiment. It creates or gets the function and its HTTP trigger, and prepares the input data for the benchmark. - + Args: sebs_client: The SeBS client to use deployment_client: The deployment client to use @@ -78,7 +78,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: self.benchmark_input = benchmark.prepare_input( deployment_client.system_resources, size="test", replace_existing=True ) - + # Get storage for testing storage latency self._storage = deployment_client.system_resources.get_storage(replace_existing=True) @@ -98,8 +98,8 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: def run(self) -> None: """Run the network ping-pong experiment. - - This method executes the experiment, measuring network latency and + + This method executes the experiment, measuring network latency and throughput between the client and the serverless function. It first determines the client's public IP address to include in the results. """ @@ -125,11 +125,11 @@ def run(self) -> None: def process(self, directory: str) -> None: """Process the experiment results. - + This method processes the CSV files generated during the experiment execution, computes round-trip times (RTT), and generates summary statistics and a histogram of the RTT distribution. - + Args: directory: Directory containing the experiment results """ @@ -160,11 +160,11 @@ def process(self, directory: str) -> None: def receive_datagrams(self, repetitions: int, port: int, ip: str) -> None: """Receive UDP datagrams from the function and respond to them. - + This method acts as a UDP server, receiving datagrams from the function and responding to them. It measures the timestamps of packet reception and response, and records them for later analysis. - + Args: repetitions: Number of repetitions to execute port: UDP port to listen on @@ -219,7 +219,7 @@ def receive_datagrams(self, repetitions: int, port: int, ip: str) -> None: @staticmethod def name() -> str: """Get the name of the experiment. - + Returns: The name "network-ping-pong" """ @@ -228,7 +228,7 @@ def name() -> str: @staticmethod def typename() -> str: """Get the type name of the experiment. - + Returns: The type name "Experiment.NetworkPingPong" """ diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index d3018547..0e5b7cb0 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -35,15 +35,15 @@ class PerfCost(Experiment): """Performance and cost measurement experiment. - + This experiment measures the performance characteristics and execution costs of serverless functions under different execution conditions. It can measure cold starts, warm execution, burst load, and sequential execution patterns. - + The experiment can be configured to run with different memory sizes, allowing for comparison of performance across different resource allocations. - + Attributes: _benchmark: The benchmark to execute _benchmark_input: The input data for the benchmark @@ -53,10 +53,10 @@ class PerfCost(Experiment): _deployment_client: The deployment client to use _sebs_client: The SeBS client """ - + def __init__(self, config: ExperimentConfig): """Initialize a new PerfCost experiment. - + Args: config: Experiment configuration """ @@ -65,7 +65,7 @@ def __init__(self, config: ExperimentConfig): @staticmethod def name() -> str: """Get the name of the experiment. - + Returns: The name "perf-cost" """ @@ -74,7 +74,7 @@ def name() -> str: @staticmethod def typename() -> str: """Get the type name of the experiment. - + Returns: The type name "Experiment.PerfCost" """ @@ -82,14 +82,14 @@ def typename() -> str: class RunType(Enum): """Types of experiment runs. - + This enum defines the different types of experiment runs: - WARM: Measure warm execution performance (reused containers) - COLD: Measure cold start performance (new containers) - BURST: Measure performance under concurrent burst load - SEQUENTIAL: Measure performance with sequential invocations """ - + WARM = 0 COLD = 1 BURST = 2 @@ -97,7 +97,7 @@ class RunType(Enum): def str(self) -> str: """Get the string representation of the run type. - + Returns: The lowercase name of the run type """ @@ -105,11 +105,11 @@ def str(self) -> str: def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. - + This method sets up the benchmark, function, trigger, and output directory for the experiment. It creates or gets the function and its HTTP trigger, and prepares the input data for the benchmark. - + Args: sebs_client: The SeBS client to use deployment_client: The deployment client to use @@ -143,14 +143,14 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: self._out_dir = os.path.join(sebs_client.output_dir, "perf-cost") if not os.path.exists(self._out_dir): os.mkdir(self._out_dir) - + # Save clients for later use self._deployment_client = deployment_client self._sebs_client = sebs_client def run(self) -> None: """Run the experiment. - + This method runs the experiment with the configured settings. If memory sizes are specified, it runs the experiment for each memory size, updating the function configuration accordingly. @@ -161,12 +161,12 @@ def run(self) -> None: # Get memory sizes to test memory_sizes = settings["memory-sizes"] - + # Run with default memory if no specific sizes are provided if len(memory_sizes) == 0: self.logging.info("Begin experiment with default memory configuration") self.run_configuration(settings, settings["repetitions"]) - + # Run for each specified memory size for memory in memory_sizes: self.logging.info(f"Begin experiment on memory size {memory}") @@ -179,19 +179,19 @@ def run(self) -> None: def compute_statistics(self, times: List[float]) -> None: """Compute statistical analysis of execution times. - + This method computes basic statistics (mean, median, standard deviation, coefficient of variation) and confidence intervals for the given times. It computes both parametric (Student's t-distribution) and non-parametric confidence intervals. - + Args: times: List of execution times in milliseconds """ # Compute basic statistics mean, median, std, cv = basic_stats(times) self.logging.info(f"Mean {mean} [ms], median {median} [ms], std {std}, CV {cv}") - + # Compute confidence intervals for different confidence levels for alpha in [0.95, 0.99]: # Parametric confidence interval (Student's t-distribution) @@ -223,12 +223,12 @@ def _run_configuration( suffix: str = "", ) -> None: """Run a specific experiment configuration. - + This method executes the experiment with the specified run type, collecting and recording the results. It handles different run types (cold, warm, burst, sequential) appropriately, enforcing cold starts when needed and collecting execution statistics. - + Args: run_type: Type of run (cold, warm, burst, sequential) settings: Experiment settings @@ -347,16 +347,16 @@ def _run_configuration( def run_configuration(self, settings: dict, repetitions: int, suffix: str = "") -> None: """Run experiments for each configured experiment type. - + This method runs the experiment for each experiment type specified in the settings. It dispatches to the appropriate run type handler for each experiment type. - + Args: settings: Experiment settings repetitions: Number of repetitions to run suffix: Optional suffix for output file names (e.g., memory size) - + Raises: RuntimeError: If an unknown experiment type is specified """ @@ -406,12 +406,12 @@ def process( extend_time_interval: int, ) -> None: """Process experiment results and generate a CSV report. - + This method processes the experiment results, downloads additional metrics if needed, and generates a CSV report with the results. The report includes memory usage, execution times, and other metrics for each experiment type and invocation. - + Args: sebs_client: The SeBS client to use deployment_client: The deployment client to use diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index 0c5981ec..a8cb9c7c 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -22,12 +22,12 @@ class Result: """Experiment result collection and management. - + This class stores and manages the results of experiments, including function invocation results, metrics from cloud providers, and configuration information. It provides methods for adding invocation results, retrieving metrics, and serializing/deserializing results. - + Attributes: config: Dictionary containing experiment and deployment configurations _invocations: Dictionary mapping function names to invocation results @@ -37,7 +37,7 @@ class Result: result_bucket: Optional bucket name for storing results logging_handlers: Logging handlers for the result """ - + def __init__( self, experiment_config: ExperimentConfig, @@ -47,7 +47,7 @@ def __init__( result_bucket: Optional[str] = None, ): """Initialize a new experiment result. - + Args: experiment_config: Experiment configuration deployment_config: Deployment configuration @@ -71,21 +71,21 @@ def __init__( def begin(self) -> None: """Mark the beginning of the experiment. - + This method records the start time of the experiment. """ self.begin_time = datetime.now().timestamp() def end(self) -> None: """Mark the end of the experiment. - + This method records the end time of the experiment. """ self.end_time = datetime.now().timestamp() def times(self) -> Tuple[float, float]: """Get the start and end times of the experiment. - + Returns: Tuple of (start_time, end_time) as Unix timestamps """ @@ -93,7 +93,7 @@ def times(self) -> Tuple[float, float]: def add_result_bucket(self, result_bucket: str) -> None: """Set the result bucket for storing experiment results. - + Args: result_bucket: Name of the bucket to store results in """ @@ -101,10 +101,10 @@ def add_result_bucket(self, result_bucket: str) -> None: def add_invocation(self, func: Function, invocation: ExecutionResult) -> None: """Add an invocation result for a specific function. - + If the invocation doesn't have a request ID (likely due to failure), a synthetic ID is generated. - + Args: func: Function the invocation belongs to invocation: Execution result to add @@ -123,7 +123,7 @@ def add_invocation(self, func: Function, invocation: ExecutionResult) -> None: def functions(self) -> List[str]: """Get a list of all function names in the results. - + Returns: List of function names """ @@ -131,13 +131,13 @@ def functions(self) -> List[str]: def invocations(self, func: str) -> Dict[str, ExecutionResult]: """Get invocation results for a specific function. - + Args: func: Name of the function to get invocation results for - + Returns: Dictionary mapping request IDs to execution results - + Raises: KeyError: If function name is not found in results """ @@ -145,13 +145,13 @@ def invocations(self, func: str) -> Dict[str, ExecutionResult]: def metrics(self, func: str) -> dict: """Get metrics for a specific function. - + If no metrics exist for the function, an empty dictionary is created and returned. - + Args: func: Name of the function to get metrics for - + Returns: Dictionary of metrics for the function """ @@ -162,15 +162,15 @@ def metrics(self, func: str) -> dict: @staticmethod def deserialize(cached_config: dict, cache: Cache, handlers: LoggingHandlers) -> "Result": """Deserialize a result from a dictionary representation. - + This static method creates a new Result object from a dictionary representation, which may have been loaded from a file or cache. - + Args: cached_config: Dictionary representation of the result cache: Cache instance for resolving references handlers: Logging handlers for the result - + Returns: A new Result object with settings from the dictionary """ diff --git a/sebs/experiments/startup_time.py b/sebs/experiments/startup_time.py index 203c7ae5..dd7ccef5 100644 --- a/sebs/experiments/startup_time.py +++ b/sebs/experiments/startup_time.py @@ -25,23 +25,24 @@ class StartupTime(Experiment): """Startup time measurement experiment. - + This experiment measures the startup and initialization time of serverless functions, focusing on cold start performance. It isolates the time spent in container initialization, runtime startup, and function loading. - + The experiment can be used to compare startup times across different: - Programming languages and runtimes - Memory configurations - Code package sizes - Platform configurations - + Attributes: config: Experiment configuration settings """ + def __init__(self, config: ExperimentConfig) -> None: """Initialize a new StartupTime experiment. - + Args: config: Experiment configuration """ @@ -50,7 +51,7 @@ def __init__(self, config: ExperimentConfig) -> None: @staticmethod def name() -> str: """Get the name of the experiment. - + Returns: The name "startup-time" """ @@ -59,34 +60,34 @@ def name() -> str: @staticmethod def typename() -> str: """Get the type name of the experiment. - + Returns: The type name "Experiment.StartupTime" """ return "Experiment.StartupTime" - + def prepare(self, sebs_client: "SeBS", deployment_client: "FaaSSystem") -> None: """Prepare the experiment for execution. - + This method sets up the experiment by preparing the benchmark function and configuring the necessary resources for measuring startup time. - + Args: sebs_client: The SeBS client to use deployment_client: The deployment client to use - + Note: This experiment is currently a placeholder and needs implementation. """ # TODO: Implement startup time experiment preparation pass - + def run(self) -> None: """Execute the startup time experiment. - + This method runs the experiment to measure function startup times, enforcing cold starts and measuring initialization overhead. - + Note: This experiment is currently a placeholder and needs implementation. """ diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 91683a38..5cb53330 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -30,11 +30,11 @@ class ExecutionTimes: """ Client-side timing measurements for function execution. - + Stores various timing measurements from the client's perspective, including total execution time, HTTP connection times, and benchmark runtime. - + Attributes: client: Total client-side execution time in microseconds client_begin: Timestamp when the request was initiated @@ -63,10 +63,10 @@ def __init__(self): def deserialize(cached_obj: dict) -> "ExecutionTimes": """ Create an ExecutionTimes instance from a dictionary. - + Args: cached_obj: Dictionary containing serialized timing data - + Returns: ExecutionTimes: New instance with the deserialized data """ @@ -78,10 +78,10 @@ def deserialize(cached_obj: dict) -> "ExecutionTimes": class ProviderTimes: """ Provider-reported timing measurements for function execution. - + Stores timing measurements reported by the cloud provider, including initialization time and execution time. - + Attributes: initialization: Function initialization time in microseconds execution: Function execution time in microseconds @@ -99,10 +99,10 @@ def __init__(self): def deserialize(cached_obj: dict) -> "ProviderTimes": """ Create a ProviderTimes instance from a dictionary. - + Args: cached_obj: Dictionary containing serialized timing data - + Returns: ProviderTimes: New instance with the deserialized data """ @@ -114,10 +114,10 @@ def deserialize(cached_obj: dict) -> "ProviderTimes": class ExecutionStats: """ Statistics for function execution. - + Tracks execution statistics such as memory usage, cold start status, and execution failure. - + Attributes: memory_used: Amount of memory used in MB (if available) cold_start: Whether this was a cold start execution @@ -138,10 +138,10 @@ def __init__(self): def deserialize(cached_obj: dict) -> "ExecutionStats": """ Create an ExecutionStats instance from a dictionary. - + Args: cached_obj: Dictionary containing serialized statistics - + Returns: ExecutionStats: New instance with the deserialized data """ @@ -153,10 +153,10 @@ def deserialize(cached_obj: dict) -> "ExecutionStats": class ExecutionBilling: """ Billing information for function execution. - + Tracks billing-related metrics such as allocated memory, billed execution time, and GB-seconds consumed. - + Attributes: memory: Allocated memory in MB billed_time: Billed execution time in milliseconds @@ -177,7 +177,7 @@ def __init__(self): def memory(self) -> Optional[int]: """ Get the allocated memory in MB. - + Returns: int: Memory allocation in MB, or None if not available """ @@ -187,7 +187,7 @@ def memory(self) -> Optional[int]: def memory(self, val: int): """ Set the allocated memory in MB. - + Args: val: Memory allocation in MB """ @@ -197,7 +197,7 @@ def memory(self, val: int): def billed_time(self) -> Optional[int]: """ Get the billed execution time in milliseconds. - + Returns: int: Billed time in milliseconds, or None if not available """ @@ -207,7 +207,7 @@ def billed_time(self) -> Optional[int]: def billed_time(self, val: int): """ Set the billed execution time in milliseconds. - + Args: val: Billed time in milliseconds """ @@ -217,7 +217,7 @@ def billed_time(self, val: int): def gb_seconds(self) -> int: """ Get the GB-seconds consumed. - + Returns: int: GB-seconds consumed """ @@ -227,7 +227,7 @@ def gb_seconds(self) -> int: def gb_seconds(self, val: int): """ Set the GB-seconds consumed. - + Args: val: GB-seconds consumed """ @@ -237,10 +237,10 @@ def gb_seconds(self, val: int): def deserialize(cached_obj: dict) -> "ExecutionBilling": """ Create an ExecutionBilling instance from a dictionary. - + Args: cached_obj: Dictionary containing serialized billing data - + Returns: ExecutionBilling: New instance with the deserialized data """ @@ -252,11 +252,11 @@ def deserialize(cached_obj: dict) -> "ExecutionBilling": class ExecutionResult: """ Comprehensive result of a function execution. - + This class captures all timing information, provider metrics, and function output from a single function invocation. It provides methods for parsing benchmark output and calculating metrics. - + Attributes: output: Dictionary containing function output request_id: Unique identifier for the request @@ -286,11 +286,11 @@ def __init__(self): def from_times(client_time_begin: datetime, client_time_end: datetime) -> "ExecutionResult": """ Create an ExecutionResult with client-side timing information. - + Args: client_time_begin: Timestamp when the request was initiated client_time_end: Timestamp when the response was received - + Returns: ExecutionResult: New instance with calculated client-side timing """ @@ -303,12 +303,12 @@ def from_times(client_time_begin: datetime, client_time_end: datetime) -> "Execu def parse_benchmark_output(self, output: dict): """ Parse the output from a benchmark execution. - + Extracts timing information and cold start status from the benchmark output. - + Args: output: Dictionary containing benchmark output - + Raises: RuntimeError: If the invocation failed (missing required fields) """ @@ -329,10 +329,10 @@ def parse_benchmark_output(self, output: dict): def deserialize(cached_config: dict) -> "ExecutionResult": """ Create an ExecutionResult instance from a cached configuration. - + Args: cached_config: Dictionary containing serialized execution result - + Returns: ExecutionResult: New instance with the deserialized data """ @@ -349,20 +349,22 @@ def deserialize(cached_config: dict) -> "ExecutionResult": class Trigger(ABC, LoggingBase): """ Abstract base class for function triggers. - + A trigger represents a mechanism for invoking a serverless function, such as HTTP requests, direct SDK invocations, or event-based triggers. Each trigger type implements synchronous and asynchronous invocation methods. """ + class TriggerType(Enum): """ Enumeration of supported trigger types. - + Defines the different mechanisms for invoking serverless functions: - HTTP: Invocation via HTTP requests - LIBRARY: Invocation via cloud provider SDK - STORAGE: Invocation via storage events """ + HTTP = "http" LIBRARY = "library" STORAGE = "storage" @@ -371,13 +373,13 @@ class TriggerType(Enum): def get(name: str) -> "Trigger.TriggerType": """ Get a TriggerType by name (case-insensitive). - + Args: name: Name of the trigger type - + Returns: TriggerType: The matching trigger type - + Raises: Exception: If no matching trigger type is found """ @@ -389,18 +391,18 @@ def get(name: str) -> "Trigger.TriggerType": def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> ExecutionResult: """ Invoke a function via HTTP request. - + Makes a HTTP POST request to the given URL with the provided payload and processes the response into an ExecutionResult. - + Args: payload: Dictionary containing the function input url: URL to invoke the function verify_ssl: Whether to verify SSL certificates - + Returns: ExecutionResult: Result of the function execution - + Raises: RuntimeError: If the invocation fails or produces invalid output """ @@ -457,7 +459,7 @@ def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> Exec def trigger_type() -> "Trigger.TriggerType": """ Get the type of this trigger. - + Returns: TriggerType: The type of this trigger """ @@ -467,10 +469,10 @@ def trigger_type() -> "Trigger.TriggerType": def sync_invoke(self, payload: dict) -> ExecutionResult: """ Synchronously invoke a function with the given payload. - + Args: payload: Dictionary containing the function input - + Returns: ExecutionResult: Result of the function execution """ @@ -480,10 +482,10 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: def async_invoke(self, payload: dict) -> concurrent.futures.Future: """ Asynchronously invoke a function with the given payload. - + Args: payload: Dictionary containing the function input - + Returns: Future: Future object representing the pending execution """ @@ -493,7 +495,7 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: def serialize(self) -> dict: """ Serialize the trigger to a dictionary. - + Returns: dict: Dictionary representation of the trigger """ @@ -504,10 +506,10 @@ def serialize(self) -> dict: def deserialize(cached_config: dict) -> "Trigger": """ Create a Trigger instance from a cached configuration. - + Args: cached_config: Dictionary containing serialized trigger - + Returns: Trigger: New instance with the deserialized data """ @@ -517,9 +519,10 @@ def deserialize(cached_config: dict) -> "Trigger": class Language(Enum): """ Enumeration of supported programming languages. - + Currently supports Python and Node.js for serverless functions. """ + PYTHON = "python" NODEJS = "nodejs" @@ -527,13 +530,13 @@ class Language(Enum): def deserialize(val: str) -> Language: """ Get a Language by string value. - + Args: val: String representation of the language - + Returns: Language: The matching language enum - + Raises: Exception: If no matching language is found """ @@ -546,16 +549,17 @@ def deserialize(val: str) -> Language: class Architecture(Enum): """ Enumeration of supported CPU architectures. - + Defines the CPU architectures that can be targeted for function deployment. """ + X86 = "x64" ARM = "arm64" def serialize(self) -> str: """ Serialize the architecture to a string. - + Returns: str: String representation of the architecture """ @@ -565,13 +569,13 @@ def serialize(self) -> str: def deserialize(val: str) -> Architecture: """ Get an Architecture by string value. - + Args: val: String representation of the architecture - + Returns: Architecture: The matching architecture enum - + Raises: Exception: If no matching architecture is found """ @@ -585,9 +589,9 @@ def deserialize(val: str) -> Architecture: class Runtime: """ Runtime configuration for a serverless function. - + Defines the language and version for a function's runtime environment. - + Attributes: language: Programming language (Python, Node.js) version: Version string of the language runtime @@ -599,7 +603,7 @@ class Runtime: def serialize(self) -> dict: """ Serialize the runtime to a dictionary. - + Returns: dict: Dictionary representation of the runtime """ @@ -609,10 +613,10 @@ def serialize(self) -> dict: def deserialize(config: dict) -> Runtime: """ Create a Runtime instance from a dictionary. - + Args: config: Dictionary containing serialized runtime - + Returns: Runtime: New instance with the deserialized data """ @@ -627,15 +631,16 @@ def deserialize(config: dict) -> Runtime: class FunctionConfig: """ Configuration for a serverless function. - + Defines the resources, runtime, and architecture for a function deployment. - + Attributes: timeout: Maximum execution time in seconds memory: Memory allocation in MB runtime: Runtime environment configuration architecture: CPU architecture for deployment """ + timeout: int memory: int runtime: Runtime @@ -645,11 +650,11 @@ class FunctionConfig: def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: """ Create a FunctionConfig subclass instance from a benchmark. - + Args: benchmark: Benchmark to extract configuration from obj_type: Type of FunctionConfig to create - + Returns: T: New instance of the specified FunctionConfig subclass """ @@ -667,10 +672,10 @@ def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: def from_benchmark(benchmark: Benchmark) -> FunctionConfig: """ Create a FunctionConfig instance from a benchmark. - + Args: benchmark: Benchmark to extract configuration from - + Returns: FunctionConfig: New instance with the benchmark's configuration """ @@ -680,10 +685,10 @@ def from_benchmark(benchmark: Benchmark) -> FunctionConfig: def deserialize(data: dict) -> FunctionConfig: """ Create a FunctionConfig instance from a dictionary. - + Args: data: Dictionary containing serialized function configuration - + Returns: FunctionConfig: New instance with the deserialized data """ @@ -695,7 +700,7 @@ def deserialize(data: dict) -> FunctionConfig: def serialize(self) -> dict: """ Serialize the function configuration to a dictionary. - + Returns: dict: Dictionary representation of the function configuration """ @@ -705,14 +710,14 @@ def serialize(self) -> dict: class Function(LoggingBase): """ Abstract base class for serverless functions. - + This class represents a deployed serverless function with its configuration and associated triggers. It provides a unified interface for managing function deployments across different cloud providers. - + Each cloud provider (AWS, Azure, GCP, etc.) implements a subclass with platform-specific functionality. - + Attributes: config: Function configuration name: Name of the deployed function @@ -720,11 +725,11 @@ class Function(LoggingBase): code_package_hash: Hash of the deployed code package updated_code: Whether the code has been updated since deployment """ - + def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfig): """ Initialize a Function instance. - + Args: benchmark: Name of the benchmark name: Name of the function @@ -743,7 +748,7 @@ def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfi def config(self) -> FunctionConfig: """ Get the function configuration. - + Returns: FunctionConfig: Configuration of the function """ @@ -753,7 +758,7 @@ def config(self) -> FunctionConfig: def name(self) -> str: """ Get the name of the function. - + Returns: str: Name of the function """ @@ -763,7 +768,7 @@ def name(self) -> str: def benchmark(self) -> str: """ Get the name of the benchmark. - + Returns: str: Name of the benchmark """ @@ -773,7 +778,7 @@ def benchmark(self) -> str: def code_package_hash(self) -> str: """ Get the hash of the code package. - + Returns: str: Hash of the code package """ @@ -783,7 +788,7 @@ def code_package_hash(self) -> str: def code_package_hash(self, new_hash: str): """ Set the hash of the code package. - + Args: new_hash: New hash of the code package """ @@ -793,7 +798,7 @@ def code_package_hash(self, new_hash: str): def updated_code(self) -> bool: """ Check if the code has been updated since deployment. - + Returns: bool: True if the code has been updated, False otherwise """ @@ -803,7 +808,7 @@ def updated_code(self) -> bool: def updated_code(self, val: bool): """ Set whether the code has been updated since deployment. - + Args: val: True if the code has been updated, False otherwise """ @@ -812,7 +817,7 @@ def updated_code(self, val: bool): def triggers_all(self) -> List[Trigger]: """ Get all triggers associated with this function. - + Returns: List[Trigger]: List of all triggers """ @@ -821,10 +826,10 @@ def triggers_all(self) -> List[Trigger]: def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: """ Get triggers of a specific type associated with this function. - + Args: trigger_type: Type of triggers to get - + Returns: List[Trigger]: List of triggers of the specified type """ @@ -836,7 +841,7 @@ def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: def add_trigger(self, trigger: Trigger): """ Add a trigger to this function. - + Args: trigger: Trigger to add """ @@ -848,7 +853,7 @@ def add_trigger(self, trigger: Trigger): def serialize(self) -> dict: """ Serialize the function to a dictionary. - + Returns: dict: Dictionary representation of the function """ @@ -867,10 +872,10 @@ def serialize(self) -> dict: def deserialize(cached_config: dict) -> "Function": """ Create a Function instance from a cached configuration. - + Args: cached_config: Dictionary containing serialized function - + Returns: Function: New instance with the deserialized data """ diff --git a/sebs/faas/nosql.py b/sebs/faas/nosql.py index 2be96fbf..045ffeef 100644 --- a/sebs/faas/nosql.py +++ b/sebs/faas/nosql.py @@ -19,23 +19,23 @@ class NoSQLStorage(ABC, LoggingBase): """ Abstract base class for NoSQL database storage implementations. - + This class defines the interface for NoSQL database operations across different cloud platforms and local environments. Concrete implementations handle the platform-specific details of creating tables, writing data, and managing resources. - + Attributes: cache_client: Client for caching database information region: Cloud region where the database is deployed """ - + @staticmethod @abstractmethod def deployment_name() -> str: """ Get the name of the deployment platform. - + Returns: str: Name of the deployment platform (e.g., 'aws', 'azure', 'gcp') """ @@ -45,7 +45,7 @@ def deployment_name() -> str: def cache_client(self) -> Cache: """ Get the cache client. - + Returns: Cache: The cache client for database information """ @@ -55,7 +55,7 @@ def cache_client(self) -> Cache: def region(self) -> str: """ Get the cloud region. - + Returns: str: The cloud region where the database is deployed """ @@ -64,7 +64,7 @@ def region(self) -> str: def __init__(self, region: str, cache_client: Cache, resources: Resources): """ Initialize a NoSQL storage instance. - + Args: region: Cloud region where the database is deployed cache_client: Client for caching database information @@ -80,10 +80,10 @@ def __init__(self, region: str, cache_client: Cache, resources: Resources): def get_tables(self, benchmark: str) -> Dict[str, str]: """ Get all tables associated with a benchmark. - + Args: benchmark: Name of the benchmark - + Returns: Dict[str, str]: Dictionary mapping table logical names to physical table names """ @@ -93,11 +93,11 @@ def get_tables(self, benchmark: str) -> Dict[str, str]: def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: """ Get the physical table name for a benchmark's logical table. - + Args: benchmark: Name of the benchmark table: Logical name of the table - + Returns: Optional[str]: Physical table name if it exists, None otherwise """ @@ -107,10 +107,10 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: def retrieve_cache(self, benchmark: str) -> bool: """ Retrieve cached table information for a benchmark. - + Args: benchmark: Name of the benchmark - + Returns: bool: True if cache was successfully retrieved, False otherwise """ @@ -120,7 +120,7 @@ def retrieve_cache(self, benchmark: str) -> bool: def update_cache(self, benchmark: str): """ Update the cache with the latest table information for a benchmark. - + Args: benchmark: Name of the benchmark """ @@ -129,7 +129,7 @@ def update_cache(self, benchmark: str): def envs(self) -> dict: """ Get environment variables required for connecting to the NoSQL storage. - + Returns: dict: Dictionary of environment variables """ @@ -153,10 +153,10 @@ def create_benchmark_tables( ): """ Create a table for a benchmark if it doesn't exist in the cache. - + Checks if the table already exists in the cache. If not, creates a new table with the specified keys. - + Args: benchmark: Name of the benchmark name: Logical name of the table @@ -188,13 +188,13 @@ def create_table( ) -> str: """ Create a new table for a benchmark. - + Args: benchmark: Name of the benchmark name: Logical name of the table primary_key: Primary key field name secondary_key: Optional secondary key field name - + Returns: str: Physical name of the created table """ @@ -211,7 +211,7 @@ def write_to_table( ): """ Write data to a table. - + Args: benchmark: Name of the benchmark table: Logical name of the table @@ -233,10 +233,10 @@ def write_to_table( def clear_table(self, name: str) -> str: """ Clear all data from a table. - + Args: name: Name of the table to clear - + Returns: str: Result message or status """ @@ -246,10 +246,10 @@ def clear_table(self, name: str) -> str: def remove_table(self, name: str) -> str: """ Remove a table completely. - + Args: name: Name of the table to remove - + Returns: str: Result message or status """ diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index ebc421eb..43165baa 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -124,7 +124,7 @@ def exists_bucket(self, bucket_name: str) -> bool: pass @abstractmethod - def clean_bucket(self, bucket_name: str): + def clean_bucket(self, bucket_name: str) -> None: pass @abstractmethod @@ -163,20 +163,20 @@ def benchmark_data( if cached_storage is not None: - cached_storage = cached_storage["buckets"] + cached_buckets = cached_storage["buckets"] # verify the input is up to date for prefix in self.input_prefixes: - if prefix not in cached_storage["input"]: + if prefix not in cached_buckets["input"]: self.cached = False for prefix in self.output_prefixes: - if prefix not in cached_storage["output"]: + if prefix not in cached_buckets["output"]: self.cached = False else: self.cached = False - if self.cached is True and cached_storage["input_uploaded"] is False: + if cached_storage is not None and cached_storage["input_uploaded"] is False: self.cached = False # query buckets if the input prefixes changed, or the input is not up to date. diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 270f7433..8db54209 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -29,12 +29,12 @@ class System(ABC, LoggingBase): """ Abstract base class for FaaS system implementations. - + This class provides basic abstractions for all supported FaaS platforms. It defines the interface for system initialization, resource management, function deployment, code packaging, function invocation, and metrics collection. Each cloud provider implements a concrete subclass of this abstract base. - + The class handles: - System and storage service initialization - Creation and updating of serverless functions @@ -43,7 +43,7 @@ class System(ABC, LoggingBase): - Metrics collection and error handling - Caching of functions to avoid redundant deployments - Cold start management - + Attributes: system_config: Global SeBS configuration docker_client: Docker client for building code packages and containers @@ -51,6 +51,7 @@ class System(ABC, LoggingBase): cold_start_counter: Counter for generating unique function names to force cold starts system_resources: Resources manager for the specific cloud platform """ + def __init__( self, system_config: SeBSConfig, @@ -60,7 +61,7 @@ def __init__( ): """ Initialize a FaaS system implementation. - + Args: system_config: Global SeBS configuration settings cache_client: Cache client for storing function and deployment information @@ -79,7 +80,7 @@ def __init__( def system_config(self) -> SeBSConfig: """ Get the global SeBS configuration. - + Returns: SeBSConfig: The system configuration """ @@ -89,7 +90,7 @@ def system_config(self) -> SeBSConfig: def docker_client(self) -> docker.client: """ Get the Docker client. - + Returns: docker.client: The Docker client """ @@ -99,7 +100,7 @@ def docker_client(self) -> docker.client: def cache_client(self) -> Cache: """ Get the cache client. - + Returns: Cache: The cache client """ @@ -109,10 +110,10 @@ def cache_client(self) -> Cache: def cold_start_counter(self) -> int: """ Get the cold start counter. - + This counter is used in function name generation to help force cold starts by creating new function instances with different names. - + Returns: int: The current cold start counter value """ @@ -122,7 +123,7 @@ def cold_start_counter(self) -> int: def cold_start_counter(self, val: int): """ Set the cold start counter. - + Args: val: The new counter value """ @@ -133,7 +134,7 @@ def cold_start_counter(self, val: int): def config(self) -> Config: """ Get the platform-specific configuration. - + Returns: Config: The platform-specific configuration """ @@ -143,7 +144,7 @@ def config(self) -> Config: def system_resources(self) -> SystemResources: """ Get the platform-specific resources manager. - + Returns: SystemResources: The resources manager """ @@ -154,7 +155,7 @@ def system_resources(self) -> SystemResources: def function_type() -> "Type[Function]": """ Get the platform-specific Function class type. - + Returns: Type[Function]: The Function class for this platform """ @@ -163,11 +164,11 @@ def function_type() -> "Type[Function]": def find_deployments(self) -> List[str]: """ Find existing deployments in the cloud platform. - + Default implementation uses storage buckets to identify deployments. This can be overridden by platform-specific implementations, e.g., Azure that looks for unique storage accounts. - + Returns: List[str]: List of existing deployment resource IDs """ @@ -176,12 +177,12 @@ def find_deployments(self) -> List[str]: def initialize_resources(self, select_prefix: Optional[str]): """ Initialize cloud resources for the deployment. - + This method either: 1. Uses an existing resource ID from configuration 2. Finds and reuses an existing deployment matching the prefix 3. Creates a new unique resource ID and initializes resources - + Args: select_prefix: Optional prefix to match when looking for existing deployments """ @@ -223,17 +224,17 @@ def initialize_resources(self, select_prefix: Optional[str]): res_id = str(uuid.uuid1())[0:8] self.config.resources.resources_id = res_id self.logging.info(f"Generating unique resource name {res_id}") - + # Ensure that the bucket is created - this allocates the new resource self.system_resources.get_storage().get_bucket(Resources.StorageBucketType.BENCHMARKS) def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): """ Initialize the system. - + After this call completes, the local or remote FaaS system should be ready to allocate functions, manage storage resources, and invoke functions. - + Args: config: System-specific parameters resource_prefix: Optional prefix for resource naming @@ -253,7 +254,7 @@ def package_code( ) -> Tuple[str, int, str]: """ Apply system-specific code packaging to prepare a deployment package. - + The benchmark creates a code directory with the following structure: - [benchmark sources] - [benchmark resources] @@ -262,7 +263,7 @@ def package_code( This step transforms that structure to fit platform-specific deployment requirements, such as creating a zip file for AWS or container image. - + Args: directory: Path to the code directory language_name: Programming language name @@ -312,11 +313,11 @@ def create_function( def cached_function(self, function: Function): """ Perform any necessary operations for a cached function. - + This method is called when a function is found in the cache. It may perform platform-specific operations such as checking if the function still exists in the cloud, updating permissions, etc. - + Args: function: The cached function instance """ @@ -347,22 +348,22 @@ def update_function( def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) -> Function: """ Get or create a function for a benchmark. - + This method handles the complete function creation/update workflow: - + 1. If a cached function with the given name exists and code has not changed, returns the existing function 2. If a cached function exists but the code has changed, updates the function with the new code 3. If no cached function exists, creates a new function - + Args: code_package: The benchmark containing the function code func_name: Optional name for the function (will be generated if not provided) - + Returns: Function: The function instance - + Raises: Exception: If the language version is not supported by this platform """ @@ -381,14 +382,14 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) # Generate function name if not provided if not func_name: func_name = self.default_function_name(code_package) - + # Build the code package rebuilt, _, container_deployment, container_uri = code_package.build(self.package_code) # Check if function exists in cache functions = code_package.functions is_function_cached = not (not functions or func_name not in functions) - + if is_function_cached: # Retrieve function from cache cached_function = functions[func_name] @@ -429,7 +430,7 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) self.logging.info( "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_location) ) - + # Check if code needs to be updated if function.code_package_hash != code_package.hash or rebuilt: if function.code_package_hash != code_package.hash: @@ -444,7 +445,7 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) f"Enforcing rebuild and update of cached function " f"{func_name} with hash {function.code_package_hash}." ) - + # Update function code self.update_function(function, code_package, container_deployment, container_uri) function.code_package_hash = code_package.hash @@ -456,7 +457,7 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) function=function, ) code_package.query_cache() - + # Check if configuration needs to be updated elif self.is_configuration_changed(function, code_package): self.update_function_configuration(function, code_package) @@ -464,17 +465,17 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) code_package.query_cache() else: self.logging.info(f"Cached function {func_name} is up to date.") - + return function @abstractmethod def update_function_configuration(self, cached_function: Function, benchmark: Benchmark): """ Update the configuration of an existing function. - + This method is called when a function's code is up-to-date but its configuration (memory, timeout, etc.) needs to be updated. - + Args: cached_function: The function to update benchmark: The benchmark containing the new configuration @@ -484,19 +485,19 @@ def update_function_configuration(self, cached_function: Function, benchmark: Be def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: """ Check if a function's configuration needs to be updated. - + This function checks for common function parameters to verify if their values are still up to date with the benchmark configuration. - + Args: cached_function: The existing function benchmark: The benchmark with potential new configuration - + Returns: bool: True if configuration has changed, False otherwise """ changed = False - + # Check common configuration attributes for attr in ["timeout", "memory"]: new_val = getattr(benchmark.benchmark_config, attr) @@ -531,11 +532,11 @@ def default_function_name( ) -> str: """ Generate a default function name for a benchmark. - + Args: code_package: The benchmark to generate a name for resources: Optional resources configuration - + Returns: str: Generated function name """ @@ -545,10 +546,10 @@ def default_function_name( def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): """ Force cold starts for the specified functions. - + This method implements platform-specific techniques to ensure that subsequent invocations of the functions will be cold starts. - + Args: functions: List of functions to enforce cold starts for code_package: The benchmark associated with the functions @@ -566,7 +567,7 @@ def download_metrics( ): """ Download function metrics from the cloud platform. - + Args: function_name: Name of the function to get metrics for start_time: Start timestamp for metrics collection @@ -580,11 +581,11 @@ def download_metrics( def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: """ Create a trigger for a function. - + Args: function: The function to create a trigger for trigger_type: Type of trigger to create - + Returns: Trigger: The created trigger """ @@ -593,7 +594,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) def disable_rich_output(self): """ Disable rich output for platforms that support it. - + This is mostly used in testing environments or CI pipelines. """ pass @@ -602,7 +603,7 @@ def disable_rich_output(self): def shutdown(self) -> None: """ Shutdown the FaaS system. - + Closes connections, stops local instances, and updates the cache. This should be called when the system is no longer needed. """ @@ -617,7 +618,7 @@ def shutdown(self) -> None: def name() -> str: """ Get the name of the platform. - + Returns: str: Platform name (e.g., 'aws', 'azure', 'gcp') """ diff --git a/sebs/gcp/cli.py b/sebs/gcp/cli.py index c39311aa..206f8d02 100644 --- a/sebs/gcp/cli.py +++ b/sebs/gcp/cli.py @@ -29,18 +29,19 @@ class GCloudCLI(LoggingBase): """Docker-based Google Cloud CLI interface. - + Provides a containerized environment for executing gcloud commands with proper authentication and project configuration. Uses a Docker container with the gcloud CLI pre-installed and configured. - + Attributes: docker_instance: Running Docker container with gcloud CLI """ + @staticmethod def typename() -> str: """Get the type name for this CLI implementation. - + Returns: Type name string for GCP CLI """ @@ -50,15 +51,15 @@ def __init__( self, credentials: GCPCredentials, system_config: SeBSConfig, docker_client: docker.client ) -> None: """Initialize the gcloud CLI Docker container. - + Sets up a Docker container with the gcloud CLI, pulling the image if needed and mounting the GCP credentials file for authentication. - + Args: credentials: GCP credentials with service account file path system_config: SeBS system configuration docker_client: Docker client for container management - + Raises: RuntimeError: If Docker image pull fails """ @@ -104,13 +105,13 @@ def __init__( def execute(self, cmd: str) -> bytes: """Execute a command in the gcloud CLI container. - + Args: cmd: Command string to execute in the container - + Returns: Command output as bytes - + Raises: RuntimeError: If the command fails (non-zero exit code) """ @@ -125,14 +126,14 @@ def execute(self, cmd: str) -> bytes: def login(self, project_name: str) -> None: """Authenticate gcloud CLI and set the active project. - + Performs service account authentication using the mounted credentials file and sets the specified project as the active project. Automatically confirms any prompts that may appear during project setup. - + Args: project_name: GCP project ID to set as active - + Note: Uses service account authentication instead of browser-based auth. May show warnings about Cloud Resource Manager API permissions. @@ -143,7 +144,7 @@ def login(self, project_name: str) -> None: def shutdown(self) -> None: """Shutdown the gcloud CLI Docker container. - + Stops and removes the Docker container used for gcloud operations. """ self.logging.info("Stopping gcloud CLI manage Docker instance") diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 4dfff710..84b04c9b 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -37,28 +37,28 @@ class GCPCredentials(Credentials): """Credentials manager for Google Cloud Platform authentication. - + Handles authentication to GCP services using service account JSON files. Automatically extracts project ID from credentials and manages environment variable setup for Google Cloud SDK authentication. - + The class supports multiple credential sources in priority order: 1. User-provided credentials file path 2. Cached credentials from previous sessions 3. GOOGLE_APPLICATION_CREDENTIALS environment variable 4. GCP_SECRET_APPLICATION_CREDENTIALS environment variable - + Attributes: _gcp_credentials: Path to the service account JSON file _project_id: GCP project ID extracted from credentials """ - + def __init__(self, gcp_credentials: str) -> None: """Initialize GCP credentials with service account file. - + Args: gcp_credentials: Path to the GCP service account JSON file - + Raises: FileNotFoundError: If the credentials file doesn't exist json.JSONDecodeError: If the credentials file is not valid JSON @@ -74,7 +74,7 @@ def __init__(self, gcp_credentials: str) -> None: @property def gcp_credentials(self) -> str: """Get the path to the GCP service account credentials file. - + Returns: Path to the service account JSON file """ @@ -83,7 +83,7 @@ def gcp_credentials(self) -> str: @property def project_name(self) -> str: """Get the GCP project ID from the credentials. - + Returns: The GCP project ID string """ @@ -92,10 +92,10 @@ def project_name(self) -> str: @staticmethod def initialize(gcp_credentials: str) -> "GCPCredentials": """Create a new GCPCredentials instance. - + Args: gcp_credentials: Path to the GCP service account JSON file - + Returns: A new GCPCredentials instance """ @@ -104,21 +104,21 @@ def initialize(gcp_credentials: str) -> "GCPCredentials": @staticmethod def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: """Deserialize GCP credentials from configuration and cache. - + Loads credentials from multiple sources in priority order: 1. User-provided config with credentials-json path 2. Cached credentials from previous sessions 3. GOOGLE_APPLICATION_CREDENTIALS environment variable 4. GCP_SECRET_APPLICATION_CREDENTIALS environment variable - + Args: config: Configuration dictionary potentially containing credentials cache: Cache instance for storing/retrieving credentials handlers: Logging handlers for error reporting - + Returns: Initialized GCPCredentials instance - + Raises: RuntimeError: If no valid credentials are found or if project ID mismatch occurs between cache and new credentials @@ -165,7 +165,7 @@ def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> Creden def serialize(self) -> Dict: """Serialize credentials to dictionary for cache storage. - + Returns: Dictionary containing project_id for cache storage """ @@ -174,7 +174,7 @@ def serialize(self) -> Dict: def update_cache(self, cache: Cache) -> None: """Update the cache with current credential information. - + Args: cache: Cache instance to update with project ID """ @@ -183,18 +183,19 @@ def update_cache(self, cache: Cache) -> None: class GCPResources(Resources): """Resource manager for Google Cloud Platform serverless resources. - + Manages cloud resources allocated for function execution and deployment, such as IAM roles, API gateways for HTTP triggers, and other GCP-specific infrastructure components. Storage resources are handled separately. - + This class extends the base Resources class with GCP-specific resource management capabilities and handles serialization/deserialization for cache persistence. - + Attributes: Inherits all attributes from the base Resources class """ + def __init__(self) -> None: """Initialize GCP resources manager.""" super().__init__(name="gcp") @@ -202,11 +203,11 @@ def __init__(self) -> None: @staticmethod def initialize(res: Resources, dct: Dict) -> "GCPResources": """Initialize GCP resources from a dictionary configuration. - + Args: res: Base Resources instance to initialize dct: Dictionary containing resource configuration - + Returns: Initialized GCPResources instance """ @@ -216,7 +217,7 @@ def initialize(res: Resources, dct: Dict) -> "GCPResources": def serialize(self) -> Dict: """Serialize resources to dictionary for cache storage. - + Returns: Dictionary representation of resources for cache storage """ @@ -225,15 +226,15 @@ def serialize(self) -> Dict: @staticmethod def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": """Deserialize GCP resources from configuration and cache. - + Loads resources from cache if available, otherwise initializes from user configuration or creates empty resource set. - + Args: config: Configuration dictionary potentially containing resources cache: Cache instance for storing/retrieving resources handlers: Logging handlers for status reporting - + Returns: Initialized GCPResources instance """ @@ -259,7 +260,7 @@ def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Resou def update_cache(self, cache: Cache) -> None: """Update the cache with current resource information. - + Args: cache: Cache instance to update with resource data """ @@ -268,14 +269,14 @@ def update_cache(self, cache: Cache) -> None: class GCPConfig(Config): """Main configuration class for Google Cloud Platform deployment. - + Combines credentials and resources into a complete configuration for GCP serverless function deployment. Manages cloud region settings, authentication, and resource allocation for the benchmarking suite. - + This class handles serialization/deserialization for cache persistence and provides validation for configuration consistency across sessions. - + Attributes: _project_name: GCP project identifier _region: GCP region for resource deployment @@ -287,7 +288,7 @@ class GCPConfig(Config): def __init__(self, credentials: GCPCredentials, resources: GCPResources) -> None: """Initialize GCP configuration with credentials and resources. - + Args: credentials: GCP authentication credentials resources: GCP resource allocation settings @@ -299,7 +300,7 @@ def __init__(self, credentials: GCPCredentials, resources: GCPResources) -> None @property def region(self) -> str: """Get the GCP region for resource deployment. - + Returns: GCP region identifier (e.g., 'us-central1') """ @@ -308,7 +309,7 @@ def region(self) -> str: @property def project_name(self) -> str: """Get the GCP project name from credentials. - + Returns: GCP project identifier string """ @@ -317,7 +318,7 @@ def project_name(self) -> str: @property def credentials(self) -> GCPCredentials: """Get the GCP credentials instance. - + Returns: GCP authentication credentials """ @@ -326,7 +327,7 @@ def credentials(self) -> GCPCredentials: @property def resources(self) -> GCPResources: """Get the GCP resources instance. - + Returns: GCP resource allocation settings """ @@ -335,16 +336,16 @@ def resources(self) -> GCPResources: @staticmethod def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Config": """Deserialize GCP configuration from dictionary and cache. - + Loads complete GCP configuration including credentials and resources. Validates consistency between cached and provided configuration values, updating cache with new user-provided values when they differ. - + Args: config: Configuration dictionary with GCP settings cache: Cache instance for storing/retrieving configuration handlers: Logging handlers for status reporting - + Returns: Initialized GCPConfig instance """ @@ -382,7 +383,7 @@ def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Confi @staticmethod def initialize(cfg: Config, dct: Dict) -> None: """Initialize GCP configuration from dictionary. - + Args: cfg: Config instance to initialize (will be cast to GCPConfig) dct: Dictionary containing configuration values including region @@ -392,7 +393,7 @@ def initialize(cfg: Config, dct: Dict) -> None: def serialize(self) -> Dict: """Serialize configuration to dictionary for cache storage. - + Returns: Dictionary containing complete GCP configuration including name, region, credentials, and resources @@ -407,9 +408,9 @@ def serialize(self) -> Dict: def update_cache(self, cache: Cache) -> None: """Update cache with current configuration values. - + Updates region, credentials, and resources in the cache. - + Args: cache: Cache instance to update with configuration data """ diff --git a/sebs/gcp/datastore.py b/sebs/gcp/datastore.py index 8605e075..a5f2bdc3 100644 --- a/sebs/gcp/datastore.py +++ b/sebs/gcp/datastore.py @@ -30,10 +30,10 @@ @dataclass class BenchmarkResources: """Resource configuration for a benchmark's Datastore database. - + Tracks the allocated database name, table kinds, and client instance for a specific benchmark's NoSQL storage requirements. - + Attributes: database: Name of the Firestore database in Datastore mode kinds: List of entity kinds (table equivalents) in the database @@ -47,7 +47,7 @@ class BenchmarkResources: def serialize(self) -> Dict: """Serialize benchmark resources for cache storage. - + Returns: Dictionary containing database name and kinds list """ @@ -56,10 +56,10 @@ def serialize(self) -> Dict: @staticmethod def deserialize(config: Dict) -> "BenchmarkResources": """Deserialize benchmark resources from cached configuration. - + Args: config: Dictionary containing cached resource configuration - + Returns: BenchmarkResources instance with database and kinds """ @@ -68,20 +68,21 @@ def deserialize(config: Dict) -> "BenchmarkResources": class Datastore(NoSQLStorage): """Google Cloud Firestore/Datastore implementation for NoSQL storage. - + Provides NoSQL database functionality using Google Cloud Firestore in Datastore mode. Manages database allocation, entity kind creation, and data operations for benchmarks requiring NoSQL capabilities. - + Attributes: _cli_instance: gcloud CLI interface for database management _region: GCP region for database allocation _benchmark_resources: Mapping of benchmarks to their database resources """ + @staticmethod def typename() -> str: """Get the type name for this NoSQL storage implementation. - + Returns: Type name string for GCP Datastore """ @@ -90,7 +91,7 @@ def typename() -> str: @staticmethod def deployment_name() -> str: """Get the deployment name for this NoSQL storage implementation. - + Returns: Deployment name string 'gcp' """ @@ -100,7 +101,7 @@ def __init__( self, cli_instance: GCloudCLI, cache_client: Cache, resources: Resources, region: str ) -> None: """Initialize Datastore NoSQL storage manager. - + Args: cli_instance: gcloud CLI interface for database operations cache_client: Cache instance for storing resource state @@ -116,13 +117,13 @@ def __init__( def get_tables(self, benchmark: str) -> Dict[str, str]: """Get table name mappings for a benchmark. - + GCP Datastore requires no table mappings as the entity kind name is the same as the benchmark table name. - + Args: benchmark: Name of the benchmark - + Returns: Empty dictionary (no mappings needed for GCP) """ @@ -130,11 +131,11 @@ def get_tables(self, benchmark: str) -> Dict[str, str]: def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: """Get the actual table name for a benchmark table. - + Args: benchmark: Name of the benchmark table: Logical table name - + Returns: Table name if it exists in benchmark resources, None otherwise """ @@ -149,10 +150,10 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: def retrieve_cache(self, benchmark: str) -> bool: """Retrieve benchmark resources from cache. - + Args: benchmark: Name of the benchmark to retrieve resources for - + Returns: True if resources were found in cache, False otherwise """ @@ -169,7 +170,7 @@ def retrieve_cache(self, benchmark: str) -> bool: def update_cache(self, benchmark: str) -> None: """Update cache with current benchmark resources. - + Args: benchmark: Name of the benchmark to cache resources for """ @@ -180,10 +181,10 @@ def update_cache(self, benchmark: str) -> None: def benchmark_database(self, benchmark: str) -> str: """Get the database name for a benchmark. - + Args: benchmark: Name of the benchmark - + Returns: Database name for the benchmark's NoSQL resources """ @@ -198,14 +199,14 @@ def write_to_table( secondary_key: Optional[Tuple[str, str]] = None, ) -> None: """Write data to a Datastore entity kind (table). - + Args: benchmark: Name of the benchmark table: Name of the table (entity kind) data: Dictionary of data to write primary_key: Primary key tuple (name, value) secondary_key: Secondary key tuple (name, value) - required for GCP - + Raises: AssertionError: If secondary_key is None (required for GCP) """ @@ -237,19 +238,19 @@ def create_table( self, benchmark: str, name: str, primary_key: str, _: Optional[str] = None ) -> str: """Create a new entity kind (table) in Datastore. - + Creates a new Firestore database in Datastore mode if needed, and adds the specified entity kind to the benchmark's resource configuration. - + Args: benchmark: Name of the benchmark name: Name of the entity kind (table) to create primary_key: Primary key field name _: Unused parameter for compatibility - + Returns: Name of the created entity kind - + Raises: RuntimeError: If database operations fail """ @@ -311,13 +312,13 @@ def create_table( def clear_table(self, name: str) -> str: """Clear all entities from a table. - + Args: name: Name of the table to clear - + Returns: Table name - + Raises: NotImplementedError: This method is not yet implemented """ @@ -325,13 +326,13 @@ def clear_table(self, name: str) -> str: def remove_table(self, name: str) -> str: """Remove a table from the database. - + Args: name: Name of the table to remove - + Returns: Table name - + Raises: NotImplementedError: This method is not yet implemented """ diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 242dbbbb..b9b21a90 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -23,13 +23,14 @@ class GCPFunction(Function): """Represents a Google Cloud Function with GCP-specific functionality. - + Extends the base Function class with GCP-specific features like bucket management for code storage and GCP-specific serialization/deserialization. - + Attributes: bucket: Cloud Storage bucket name containing the function's code """ + def __init__( self, name: str, @@ -39,7 +40,7 @@ def __init__( bucket: Optional[str] = None, ) -> None: """Initialize a GCP Cloud Function instance. - + Args: name: Function name on GCP benchmark: Name of the benchmark this function implements @@ -53,7 +54,7 @@ def __init__( @staticmethod def typename() -> str: """Get the type name for this function implementation. - + Returns: Type name string for GCP functions """ @@ -61,7 +62,7 @@ def typename() -> str: def serialize(self) -> Dict: """Serialize function to dictionary for cache storage. - + Returns: Dictionary containing function state including bucket information """ @@ -73,16 +74,16 @@ def serialize(self) -> Dict: @staticmethod def deserialize(cached_config: Dict) -> "GCPFunction": """Deserialize function from cached configuration. - + Reconstructs a GCPFunction instance from cached data including triggers and configuration. Handles both Library and HTTP triggers. - + Args: cached_config: Dictionary containing cached function configuration - + Returns: Reconstructed GCPFunction instance with triggers - + Raises: AssertionError: If an unknown trigger type is encountered """ @@ -108,14 +109,14 @@ def deserialize(cached_config: Dict) -> "GCPFunction": def code_bucket(self, benchmark: str, storage_client: GCPStorage) -> str: """Get or create the Cloud Storage bucket for function code. - + Returns the bucket name where the function's code is stored, creating a deployment bucket if none is assigned. - + Args: benchmark: Benchmark name (unused but kept for compatibility) storage_client: GCP storage client for bucket operations - + Returns: Cloud Storage bucket name containing function code """ diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index b6215169..c13253cf 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -55,18 +55,18 @@ class GCP(System): """Google Cloud Platform serverless system implementation. - + Provides complete integration with Google Cloud Functions including deployment, monitoring, logging, and resource management. Handles code packaging, function lifecycle management, trigger creation, and performance metrics collection. - + Attributes: _config: GCP-specific configuration including credentials and region function_client: Google Cloud Functions API client cold_start_counter: Counter for enforcing cold starts in benchmarking logging_handlers: Logging configuration for status reporting """ - + def __init__( self, system_config: SeBSConfig, @@ -76,7 +76,7 @@ def __init__( logging_handlers: LoggingHandlers, ) -> None: """Initialize GCP serverless system. - + Args: system_config: General SeBS system configuration config: GCP-specific configuration with credentials and settings @@ -98,7 +98,7 @@ def __init__( @property def config(self) -> GCPConfig: """Get the GCP configuration instance. - + Returns: GCP configuration with credentials and region settings """ @@ -107,7 +107,7 @@ def config(self) -> GCPConfig: @staticmethod def name() -> str: """Get the platform name identifier. - + Returns: Platform name string 'gcp' """ @@ -116,7 +116,7 @@ def name() -> str: @staticmethod def typename() -> str: """Get the platform type name for display. - + Returns: Platform type string 'GCP' """ @@ -125,18 +125,20 @@ def typename() -> str: @staticmethod def function_type() -> "Type[Function]": """Get the function class type for this platform. - + Returns: GCPFunction class type """ return GCPFunction - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None) -> None: + def initialize( + self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None + ) -> None: """Initialize the GCP system for function deployment and management. - + Sets up the Cloud Functions API client and initializes system resources including storage buckets and other required infrastructure. - + Args: config: Additional system-specific configuration parameters resource_prefix: Optional prefix for resource naming to avoid conflicts @@ -146,7 +148,7 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] def get_function_client(self): """Get the Google Cloud Functions API client. - + Returns: Initialized Cloud Functions API client """ @@ -156,15 +158,15 @@ def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: """Generate a default function name for the given benchmark. - + Creates a standardized function name using resource ID, benchmark name, language, and version information. Formats the name according to GCP Cloud Functions naming requirements. - + Args: code_package: Benchmark package containing metadata resources: Optional resource configuration for ID generation - + Returns: Formatted function name suitable for GCP Cloud Functions """ @@ -181,14 +183,14 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: """Format function name according to GCP Cloud Functions requirements. - + Converts function names to comply with GCP naming rules by replacing hyphens and dots with underscores. GCP functions must begin with a letter and can only contain letters, numbers, and underscores. - + Args: func_name: Raw function name to format - + Returns: GCP-compliant function name """ @@ -209,17 +211,17 @@ def package_code( container_deployment: bool, ) -> Tuple[str, int, str]: """Package benchmark code for GCP Cloud Functions deployment. - + Transforms the benchmark code directory structure to meet GCP Cloud Functions requirements. Creates a zip archive with the appropriate handler file naming and directory structure for the specified language runtime. - + The packaging process: 1. Creates a 'function' subdirectory for benchmark sources 2. Renames handler files to GCP-required names (handler.py -> main.py) 3. Creates a zip archive for deployment 4. Restores original file structure - + Args: directory: Path to the benchmark code directory language_name: Programming language (python, nodejs) @@ -228,10 +230,10 @@ def package_code( benchmark: Benchmark name for archive naming is_cached: Whether this package is from cache container_deployment: Whether to use container deployment (unsupported) - + Returns: Tuple of (archive_path, archive_size_bytes, container_uri) - + Raises: NotImplementedError: If container_deployment is True """ @@ -295,20 +297,20 @@ def create_function( container_uri: str, ) -> "GCPFunction": """Create a new GCP Cloud Function or update existing one. - + Deploys a benchmark as a Cloud Function, handling code upload to Cloud Storage, function creation with proper configuration, and IAM policy setup for unauthenticated invocations. If the function already exists, updates it instead. - + Args: code_package: Benchmark package with code and configuration func_name: Name for the Cloud Function container_deployment: Whether to use container deployment (unsupported) container_uri: Container image URI (unused for GCP) - + Returns: GCPFunction instance representing the deployed function - + Raises: NotImplementedError: If container_deployment is True RuntimeError: If function creation or IAM configuration fails @@ -435,17 +437,17 @@ def create_function( def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: """Create a trigger for the given function. - + Creates HTTP triggers for Cloud Functions, waiting for function deployment to complete before extracting the trigger URL. - + Args: function: Function instance to create trigger for trigger_type: Type of trigger to create (only HTTP supported) - + Returns: Created trigger instance with URL and configuration - + Raises: RuntimeError: If trigger type is not supported """ @@ -481,10 +483,10 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) def cached_function(self, function: Function) -> None: """Configure a cached function instance for use. - + Sets up library triggers for functions loaded from cache, ensuring they have the proper deployment client and logging configuration. - + Args: function: Cached function instance to configure """ @@ -505,17 +507,17 @@ def update_function( container_uri: str, ) -> None: """Update an existing Cloud Function with new code and configuration. - + Uploads new code package to Cloud Storage and patches the existing function with updated runtime, memory, timeout, and environment variables. Waits for deployment to complete before returning. - + Args: function: Existing function instance to update code_package: New benchmark package with updated code container_deployment: Whether to use container deployment (unsupported) container_uri: Container image URI (unused) - + Raises: NotImplementedError: If container_deployment is True RuntimeError: If function update fails after maximum retries @@ -582,14 +584,14 @@ def update_function( def _update_envs(self, full_function_name: str, envs: Dict) -> Dict: """Merge new environment variables with existing function environment. - + Retrieves current function environment variables and merges them with new variables, with new variables taking precedence on conflicts. - + Args: full_function_name: Fully qualified function name envs: New environment variables to add/update - + Returns: Merged environment variables dictionary """ @@ -608,13 +610,13 @@ def _update_envs(self, full_function_name: str, envs: Dict) -> Dict: def _generate_function_envs(self, code_package: Benchmark) -> Dict: """Generate environment variables for function based on benchmark requirements. - + Creates environment variables needed by the benchmark, such as NoSQL database connection information. - + Args: code_package: Benchmark package with module requirements - + Returns: Dictionary of environment variables for the function """ @@ -635,18 +637,18 @@ def update_function_configuration( self, function: Function, code_package: Benchmark, env_variables: Dict = {} ) -> int: """Update function configuration including memory, timeout, and environment. - + Updates the Cloud Function's memory allocation, timeout, and environment variables without changing the code. Waits for deployment to complete. - + Args: function: Function instance to update code_package: Benchmark package with configuration requirements env_variables: Additional environment variables to set - + Returns: Version ID of the updated function - + Raises: RuntimeError: If configuration update fails after maximum retries """ @@ -723,12 +725,12 @@ def update_function_configuration( @staticmethod def get_full_function_name(project_name: str, location: str, func_name: str) -> str: """Generate the fully qualified function name for GCP API calls. - + Args: project_name: GCP project ID location: GCP region/location func_name: Function name - + Returns: Fully qualified function name in GCP format """ @@ -736,12 +738,12 @@ def get_full_function_name(project_name: str, location: str, func_name: str) -> def prepare_experiment(self, benchmark: str) -> str: """Prepare storage resources for benchmark experiment. - + Creates a dedicated storage bucket for experiment logs and outputs. - + Args: benchmark: Name of the benchmark being prepared - + Returns: Name of the created logs storage bucket """ @@ -752,7 +754,7 @@ def prepare_experiment(self, benchmark: str) -> str: def shutdown(self) -> None: """Shutdown the GCP system and clean up resources. - + Performs cleanup of system resources and calls parent shutdown method. """ cast(GCPSystemResources, self._system_resources).shutdown() @@ -762,11 +764,11 @@ def download_metrics( self, function_name: str, start_time: int, end_time: int, requests: Dict, metrics: Dict ) -> None: """Download execution metrics and logs from GCP monitoring services. - + Retrieves function execution times from Cloud Logging and performance metrics from Cloud Monitoring. Processes logs to extract execution times and collects metrics like memory usage and network egress. - + Args: function_name: Name of the function to collect metrics for start_time: Start timestamp for metric collection (Unix timestamp) @@ -891,14 +893,14 @@ def wrapper(gen): def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> int: """Force a cold start by updating function configuration. - + Triggers a cold start by updating the function's environment variables with a unique counter value, forcing GCP to create a new instance. - + Args: function: Function instance to enforce cold start on code_package: Benchmark package for configuration - + Returns: Version ID of the updated function """ @@ -912,10 +914,10 @@ def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> in def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: """Enforce cold starts for multiple functions simultaneously. - + Updates all provided functions to force cold starts and waits for all deployments to complete before returning. - + Args: functions: List of functions to enforce cold starts on code_package: Benchmark package for configuration @@ -947,14 +949,14 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) def get_functions(self, code_package: Benchmark, function_names: List[str]) -> List["Function"]: """Retrieve multiple function instances and ensure they are deployed. - + Gets function instances for the provided names and waits for all functions to be in ACTIVE deployment state. - + Args: code_package: Benchmark package for function creation function_names: List of function names to retrieve - + Returns: List of deployed function instances """ @@ -988,11 +990,11 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: """Check if a function is deployed and optionally verify version. - + Args: func_name: Name of the function to check versionId: Optional specific version ID to verify (-1 to check any) - + Returns: Tuple of (is_deployed, current_version_id) """ @@ -1007,10 +1009,10 @@ def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: def deployment_version(self, func: Function) -> int: """Get the current deployment version ID of a function. - + Args: func: Function instance to check - + Returns: Current version ID of the function """ @@ -1023,10 +1025,10 @@ def deployment_version(self, func: Function) -> int: @staticmethod def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile) -> None: """Recursively add files and directories to a zip archive. - + Helper method for recursive_zip that handles directory traversal and adds files with relative paths to the archive. - + Args: base_directory: Base directory path for relative path calculation path: Current path being processed (file or directory) @@ -1044,14 +1046,14 @@ def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile) -> None @staticmethod def recursive_zip(directory: str, archname: str) -> bool: """Create a zip archive of a directory with relative paths. - + Creates a compressed zip archive of the specified directory, preserving the relative directory structure. Uses maximum compression level. - + Args: directory: Absolute path to the directory to be zipped archname: Path where the zip file should be created - + Returns: True if archiving was successful """ diff --git a/sebs/gcp/resources.py b/sebs/gcp/resources.py index e9d85ac1..df6a9b2b 100644 --- a/sebs/gcp/resources.py +++ b/sebs/gcp/resources.py @@ -32,11 +32,11 @@ class GCPSystemResources(SystemResources): """System resource manager for Google Cloud Platform services. - + Manages and provides access to all GCP services required for serverless benchmarking including Cloud Storage, Firestore/Datastore, and gcloud CLI. Handles resource initialization, configuration, and cleanup. - + Attributes: _storage: Cloud Storage instance for object storage _nosql_storage: Datastore instance for NoSQL operations @@ -44,10 +44,11 @@ class GCPSystemResources(SystemResources): _system_config: SeBS system configuration _logging_handlers: Logging configuration """ + @staticmethod def typename() -> str: """Get the type name for this resource manager. - + Returns: Type name string for GCP system resources """ @@ -56,7 +57,7 @@ def typename() -> str: @property def config(self) -> GCPConfig: """Get the GCP configuration instance. - + Returns: GCP configuration with credentials and settings """ @@ -71,7 +72,7 @@ def __init__( logger_handlers: LoggingHandlers, ) -> None: """Initialize GCP system resources manager. - + Args: system_config: SeBS system configuration config: GCP-specific configuration @@ -89,14 +90,14 @@ def __init__( def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: """Get or create the Cloud Storage instance. - + Provides access to Google Cloud Storage for persistent object storage. Creates the storage instance if it doesn't exist, or updates the replace_existing setting if provided. - + Args: replace_existing: Whether to replace existing benchmark input data - + Returns: Initialized GCP storage instance """ @@ -114,10 +115,10 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: def get_nosql_storage(self) -> Datastore: """Get or create the Datastore instance for NoSQL operations. - + Provides access to Google Cloud Firestore/Datastore for NoSQL database operations required by benchmarks. - + Returns: Initialized Datastore instance """ @@ -130,10 +131,10 @@ def get_nosql_storage(self) -> Datastore: @property def cli_instance(self) -> GCloudCLI: """Get or create the gcloud CLI instance. - + Provides access to a containerized gcloud CLI for administrative operations. Initializes and authenticates the CLI if needed. - + Returns: Authenticated gcloud CLI instance """ @@ -148,9 +149,9 @@ def cli_instance(self) -> GCloudCLI: def initialize_cli(self, cli: GCloudCLI) -> None: """Initialize with an existing CLI instance. - + Uses a pre-configured CLI instance instead of creating a new one. - + Args: cli: Pre-configured gcloud CLI instance """ @@ -159,7 +160,7 @@ def initialize_cli(self, cli: GCloudCLI) -> None: def shutdown(self) -> None: """Shutdown system resources and clean up. - + Stops the gcloud CLI container if it was created by this instance. """ if self._cli_instance and self._cli_instance_stop: diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index 772c0491..1f532c62 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -30,19 +30,20 @@ class GCPStorage(PersistentStorage): """Google Cloud Storage implementation for SeBS persistent storage. - + Provides object storage capabilities using Google Cloud Storage buckets. Handles bucket creation, file operations, and storage resource management for benchmarks, deployment artifacts, and experiment outputs. - + Attributes: client: Google Cloud Storage client instance cached: Whether storage operations use cached data """ + @staticmethod def typename() -> str: """Get the type name for this storage implementation. - + Returns: Type name string for GCP storage """ @@ -51,7 +52,7 @@ def typename() -> str: @staticmethod def deployment_name() -> str: """Get the deployment name for this storage implementation. - + Returns: Deployment name string 'gcp' """ @@ -69,7 +70,7 @@ def __init__( self, region: str, cache_client: Cache, resources: Resources, replace_existing: bool ) -> None: """Initialize GCP Storage client. - + Args: region: GCP region for storage resources cache_client: Cache instance for storing storage state @@ -83,23 +84,25 @@ def __init__( def correct_name(self, name: str) -> str: """Correct bucket name to meet GCP naming requirements. - + Args: name: Original bucket name - + Returns: Corrected bucket name (no changes needed for GCP) """ return name - def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: bool = False) -> str: + def _create_bucket( + self, name: str, buckets: List[str] = [], randomize_name: bool = False + ) -> str: """Create a new Cloud Storage bucket or return existing one. - + Args: name: Base name for the bucket buckets: List of existing bucket names to check randomize_name: Whether to append random suffix to avoid name conflicts - + Returns: Name of the created or existing bucket """ @@ -126,7 +129,7 @@ def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: boo def download(self, bucket_name: str, key: str, filepath: str) -> None: """Download a file from Cloud Storage. - + Args: bucket_name: Name of the storage bucket key: Object key/path in the bucket @@ -139,7 +142,7 @@ def download(self, bucket_name: str, key: str, filepath: str) -> None: def upload(self, bucket_name: str, filepath: str, key: str) -> None: """Upload a file to Cloud Storage. - + Args: bucket_name: Name of the storage bucket filepath: Local file path to upload @@ -153,10 +156,10 @@ def upload(self, bucket_name: str, filepath: str, key: str) -> None: def exists_bucket(self, bucket_name: str) -> bool: """Check if a Cloud Storage bucket exists. - + Args: bucket_name: Name of the bucket to check - + Returns: True if bucket exists and is accessible, False otherwise """ @@ -168,11 +171,11 @@ def exists_bucket(self, bucket_name: str) -> bool: def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: """List objects in a Cloud Storage bucket with optional prefix filter. - + Args: bucket_name: Name of the bucket to list prefix: Optional prefix to filter objects - + Returns: List of object names in the bucket matching the prefix """ @@ -183,10 +186,10 @@ def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: """List Cloud Storage buckets, optionally filtered by name. - + Args: bucket_name: Optional bucket name filter - + Returns: List of bucket names, filtered if bucket_name is provided """ @@ -199,7 +202,7 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: def remove_bucket(self, bucket_name: str) -> None: """Remove a Cloud Storage bucket. - + Args: bucket_name: Name of the bucket to remove """ @@ -207,10 +210,10 @@ def remove_bucket(self, bucket_name: str) -> None: def clean_bucket(self, bucket: str) -> None: """Clean all objects from a Cloud Storage bucket. - + Args: bucket: Name of the bucket to clean - + Raises: NotImplementedError: This method is not yet implemented """ @@ -218,10 +221,10 @@ def clean_bucket(self, bucket: str) -> None: def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: """Upload function for batch operations with caching support. - + Uploads a file to the appropriate benchmark bucket, respecting cache settings and replace_existing configuration. - + Args: path_idx: Index of the input path prefix key: Object key for the uploaded file diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index ac0fd2c7..9ac4b239 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -32,18 +32,19 @@ class LibraryTrigger(Trigger): """Direct Cloud Functions API trigger for synchronous invocation. - + Uses the Google Cloud Functions API to invoke functions directly through the cloud functions client. Provides precise execution timing and error handling. Waits for function deployment before invocation. - + Attributes: name: Function name to invoke _deployment_client: GCP client for API operations """ + def __init__(self, fname: str, deployment_client: Optional[GCP] = None) -> None: """Initialize library trigger for direct function invocation. - + Args: fname: Name of the Cloud Function to invoke deployment_client: Optional GCP client for API operations @@ -55,7 +56,7 @@ def __init__(self, fname: str, deployment_client: Optional[GCP] = None) -> None: @staticmethod def typename() -> str: """Get the type name for this trigger implementation. - + Returns: Type name string for library triggers """ @@ -64,10 +65,10 @@ def typename() -> str: @property def deployment_client(self) -> GCP: """Get the GCP deployment client. - + Returns: GCP client instance for API operations - + Raises: AssertionError: If deployment client is not set """ @@ -77,7 +78,7 @@ def deployment_client(self) -> GCP: @deployment_client.setter def deployment_client(self, deployment_client: GCP) -> None: """Set the GCP deployment client. - + Args: deployment_client: GCP client instance """ @@ -86,7 +87,7 @@ def deployment_client(self, deployment_client: GCP) -> None: @staticmethod def trigger_type() -> Trigger.TriggerType: """Get the trigger type for this implementation. - + Returns: Library trigger type enum value """ @@ -94,13 +95,13 @@ def trigger_type() -> Trigger.TriggerType: def sync_invoke(self, payload: Dict) -> ExecutionResult: """Synchronously invoke the Cloud Function using the API. - + Waits for function deployment, then invokes via Cloud Functions API. Measures execution time and handles errors. - + Args: payload: Input data to send to the function - + Returns: ExecutionResult with timing, output, and error information """ @@ -145,10 +146,10 @@ def sync_invoke(self, payload: Dict) -> ExecutionResult: def async_invoke(self, payload: Dict): """Asynchronously invoke the Cloud Function. - + Args: payload: Input data to send to the function - + Raises: NotImplementedError: Async invocation not implemented for library triggers """ @@ -156,7 +157,7 @@ def async_invoke(self, payload: Dict): def serialize(self) -> Dict: """Serialize trigger to dictionary for cache storage. - + Returns: Dictionary containing trigger type and name """ @@ -165,10 +166,10 @@ def serialize(self) -> Dict: @staticmethod def deserialize(obj: Dict) -> Trigger: """Deserialize trigger from cached configuration. - + Args: obj: Dictionary containing serialized trigger data - + Returns: Reconstructed LibraryTrigger instance """ @@ -177,16 +178,17 @@ def deserialize(obj: Dict) -> Trigger: class HTTPTrigger(Trigger): """HTTP endpoint trigger for Cloud Functions invocation. - + Invokes Cloud Functions through their HTTP endpoints, supporting both synchronous and asynchronous execution patterns using HTTP requests. - + Attributes: url: HTTP endpoint URL for the Cloud Function """ + def __init__(self, url: str) -> None: """Initialize HTTP trigger with function endpoint URL. - + Args: url: HTTP endpoint URL for the Cloud Function """ @@ -196,7 +198,7 @@ def __init__(self, url: str) -> None: @staticmethod def typename() -> str: """Get the type name for this trigger implementation. - + Returns: Type name string for HTTP triggers """ @@ -205,7 +207,7 @@ def typename() -> str: @staticmethod def trigger_type() -> Trigger.TriggerType: """Get the trigger type for this implementation. - + Returns: HTTP trigger type enum value """ @@ -213,10 +215,10 @@ def trigger_type() -> Trigger.TriggerType: def sync_invoke(self, payload: Dict) -> ExecutionResult: """Synchronously invoke the Cloud Function via HTTP. - + Args: payload: Input data to send to the function - + Returns: ExecutionResult from the HTTP invocation """ @@ -226,10 +228,10 @@ def sync_invoke(self, payload: Dict) -> ExecutionResult: def async_invoke(self, payload: Dict) -> concurrent.futures.Future: """Asynchronously invoke the Cloud Function via HTTP. - + Args: payload: Input data to send to the function - + Returns: Future object for the async HTTP invocation """ @@ -239,7 +241,7 @@ def async_invoke(self, payload: Dict) -> concurrent.futures.Future: def serialize(self) -> Dict: """Serialize trigger to dictionary for cache storage. - + Returns: Dictionary containing trigger type and URL """ @@ -248,10 +250,10 @@ def serialize(self) -> Dict: @staticmethod def deserialize(obj: Dict) -> Trigger: """Deserialize trigger from cached configuration. - + Args: obj: Dictionary containing serialized trigger data - + Returns: Reconstructed HTTPTrigger instance """ diff --git a/sebs/local/config.py b/sebs/local/config.py index 78a54074..705b7f29 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -22,15 +22,15 @@ class LocalCredentials(Credentials): """Credentials class for local execution platform. - + The local platform doesn't require any authentication credentials since functions run locally using Docker containers. This class provides the required interface with empty implementations. """ - + def serialize(self) -> dict: """Serialize credentials to dictionary. - + Returns: dict: Empty dictionary as no credentials are required for local execution """ @@ -39,12 +39,12 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: """Deserialize credentials from configuration. - + Args: config: Configuration dictionary (unused for local) - cache: Cache client (unused for local) + cache: Cache client (unused for local) handlers: Logging handlers (unused for local) - + Returns: LocalCredentials: New instance of local credentials """ @@ -59,23 +59,23 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden class LocalResources(SelfHostedResources): """Resource management for local execution platform. - + Manages resources for local function execution, including port allocation for Docker containers and storage configurations. Tracks allocated ports to avoid conflicts when running multiple functions. - + Attributes: _path: Path for local resource storage _allocated_ports: Set of ports currently allocated to containers """ - + def __init__( self, storage_cfg: Optional[PersistentStorageConfig] = None, nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, ): """Initialize local resources. - + Args: storage_cfg: Optional persistent storage configuration nosql_storage_cfg: Optional NoSQL storage configuration @@ -87,7 +87,7 @@ def __init__( @property def allocated_ports(self) -> set: """Get the set of allocated ports. - + Returns: set: Set of port numbers currently allocated to containers """ @@ -95,7 +95,7 @@ def allocated_ports(self) -> set: def serialize(self) -> dict: """Serialize resources to dictionary. - + Returns: dict: Dictionary containing resource configuration including allocated ports """ @@ -107,7 +107,7 @@ def serialize(self) -> dict: @staticmethod def initialize(res: Resources, config: dict) -> None: """Initialize resources from configuration. - + Args: res: Resources instance to initialize config: Configuration dictionary containing resource settings @@ -119,7 +119,7 @@ def initialize(res: Resources, config: dict) -> None: def update_cache(self, cache: Cache) -> None: """Update cache with current resource state. - + Args: cache: Cache client to update """ @@ -131,12 +131,12 @@ def update_cache(self, cache: Cache) -> None: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: """Deserialize resources from configuration. - + Args: config: Configuration dictionary cache: Cache client for loading cached resources handlers: Logging handlers for resource logging - + Returns: LocalResources: Initialized local resources instance """ @@ -160,16 +160,16 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class LocalConfig(Config): """Configuration class for local execution platform. - + Provides the main configuration interface for the local platform, combining credentials and resources. The local platform requires minimal configuration since it runs functions locally. - + Attributes: _credentials: Local credentials instance (empty) _resources: Local resources instance for port management """ - + def __init__(self): """Initialize local configuration.""" super().__init__(name="local") @@ -179,7 +179,7 @@ def __init__(self): @staticmethod def typename() -> str: """Get the type name for this configuration. - + Returns: str: Type name "Local.Config" """ @@ -188,11 +188,11 @@ def typename() -> str: @staticmethod def initialize(cfg: Config, dct: dict) -> None: """Initialize configuration from dictionary. - + Args: cfg: Configuration instance to initialize dct: Dictionary containing configuration data - + Note: No initialization needed for local platform """ @@ -201,7 +201,7 @@ def initialize(cfg: Config, dct: dict) -> None: @property def credentials(self) -> LocalCredentials: """Get the local credentials. - + Returns: LocalCredentials: The credentials instance """ @@ -210,7 +210,7 @@ def credentials(self) -> LocalCredentials: @property def resources(self) -> LocalResources: """Get the local resources. - + Returns: LocalResources: The resources instance """ @@ -219,7 +219,7 @@ def resources(self) -> LocalResources: @resources.setter def resources(self, val: LocalResources) -> None: """Set the local resources. - + Args: val: New resources instance """ @@ -228,12 +228,12 @@ def resources(self, val: LocalResources) -> None: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: """Deserialize configuration from dictionary. - + Args: config: Configuration dictionary cache: Cache client for loading cached configuration handlers: Logging handlers for configuration logging - + Returns: LocalConfig: Initialized local configuration instance """ @@ -246,7 +246,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config def serialize(self) -> dict: """Serialize configuration to dictionary. - + Returns: dict: Dictionary containing configuration data """ @@ -255,7 +255,7 @@ def serialize(self) -> dict: def update_cache(self, cache: Cache) -> None: """Update cache with current configuration. - + Args: cache: Cache client to update """ diff --git a/sebs/local/deployment.py b/sebs/local/deployment.py index 92ede889..76895e6e 100644 --- a/sebs/local/deployment.py +++ b/sebs/local/deployment.py @@ -30,11 +30,11 @@ class Deployment(LoggingBase): """Manages local function deployments and memory measurements. - + Coordinates the lifecycle of locally deployed functions, including container management, memory measurement collection, and resource cleanup. Handles serialization of deployment state for persistence and recovery. - + Attributes: _functions: List of deployed local functions _storage: Optional Minio storage instance @@ -42,11 +42,11 @@ class Deployment(LoggingBase): _memory_measurement_pids: PIDs of memory measurement processes _measurement_file: Path to memory measurement output file """ - + @property def measurement_file(self) -> Optional[str]: """Get the path to the memory measurement file. - + Returns: Optional[str]: Path to measurement file, or None if not set """ @@ -55,7 +55,7 @@ def measurement_file(self) -> Optional[str]: @measurement_file.setter def measurement_file(self, val: Optional[str]) -> None: """Set the path to the memory measurement file. - + Args: val: Path to measurement file, or None to unset """ @@ -72,7 +72,7 @@ def __init__(self): def add_function(self, func: LocalFunction) -> None: """Add a function to the deployment. - + Args: func: Local function to add to the deployment """ @@ -82,7 +82,7 @@ def add_function(self, func: LocalFunction) -> None: def add_input(self, func_input: dict) -> None: """Add function input configuration to the deployment. - + Args: func_input: Dictionary containing function input configuration """ @@ -90,7 +90,7 @@ def add_input(self, func_input: dict) -> None: def set_storage(self, storage: Minio) -> None: """Set the storage instance for the deployment. - + Args: storage: Minio storage instance to use """ @@ -98,7 +98,7 @@ def set_storage(self, storage: Minio) -> None: def serialize(self, path: str) -> None: """Serialize deployment configuration to file. - + Args: path: File path to write serialized deployment configuration """ @@ -120,14 +120,14 @@ def serialize(self, path: str) -> None: @staticmethod def deserialize(path: str, cache_client: Cache) -> "Deployment": """Deserialize deployment configuration from file. - + Args: path: File path to read serialized deployment configuration cache_client: Cache client for loading cached resources - + Returns: Deployment: Deserialized deployment instance - + Note: This method may be deprecated - check if still in use """ @@ -148,11 +148,11 @@ def deserialize(path: str, cache_client: Cache) -> "Deployment": def shutdown(self, output_json: str) -> None: """Shutdown the deployment and collect memory measurements. - + Terminates all memory measurement processes, processes measurement data, and stops all function containers. Memory measurements are aggregated and written to the specified output file. - + Args: output_json: Path to write memory measurement results """ diff --git a/sebs/local/function.py b/sebs/local/function.py index 838773ae..11546517 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -20,17 +20,17 @@ class HTTPTrigger(Trigger): """HTTP trigger for local function invocation. - + Provides HTTP-based triggering for functions running in local Docker containers. Supports both synchronous and asynchronous invocation patterns. - + Attributes: url: HTTP URL endpoint for function invocation """ - + def __init__(self, url: str): """Initialize HTTP trigger. - + Args: url: HTTP URL endpoint for the function """ @@ -40,7 +40,7 @@ def __init__(self, url: str): @staticmethod def typename() -> str: """Get the type name for this trigger. - + Returns: str: Type name "Local.HTTPTrigger" """ @@ -49,7 +49,7 @@ def typename() -> str: @staticmethod def trigger_type() -> Trigger.TriggerType: """Get the trigger type. - + Returns: Trigger.TriggerType: HTTP trigger type """ @@ -57,10 +57,10 @@ def trigger_type() -> Trigger.TriggerType: def sync_invoke(self, payload: dict) -> ExecutionResult: """Synchronously invoke the function via HTTP. - + Args: payload: Function input payload as dictionary - + Returns: ExecutionResult: Result of the function execution """ @@ -69,10 +69,10 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: def async_invoke(self, payload: dict) -> concurrent.futures.Future: """Asynchronously invoke the function via HTTP. - + Args: payload: Function input payload as dictionary - + Returns: concurrent.futures.Future: Future object for the execution result """ @@ -82,7 +82,7 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: def serialize(self) -> dict: """Serialize trigger configuration to dictionary. - + Returns: dict: Dictionary containing trigger type and URL """ @@ -91,10 +91,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: """Deserialize trigger from dictionary. - + Args: obj: Dictionary containing trigger configuration - + Returns: HTTPTrigger: Deserialized HTTP trigger instance """ @@ -103,11 +103,11 @@ def deserialize(obj: dict) -> Trigger: class LocalFunction(Function): """Function implementation for local execution platform. - + Represents a serverless function running locally in a Docker container. Handles container management, URL resolution, and memory measurement process tracking. - + Attributes: _instance: Docker container running the function _instance_id: Container ID for the function @@ -115,7 +115,7 @@ class LocalFunction(Function): _url: Complete URL for function invocation _measurement_pid: Optional PID of memory measurement process """ - + def __init__( self, docker_container, @@ -127,7 +127,7 @@ def __init__( measurement_pid: Optional[int] = None, ): """Initialize local function. - + Args: docker_container: Docker container instance running the function port: Port number the function is listening on @@ -136,7 +136,7 @@ def __init__( code_package_hash: Hash of the function code package config: Function configuration measurement_pid: Optional PID of memory measurement process - + Raises: RuntimeError: If container IP address cannot be determined """ @@ -167,7 +167,7 @@ def __init__( @property def container(self) -> docker.models.containers.Container: """Get the Docker container running this function. - + Returns: docker.models.containers.Container: The Docker container instance """ @@ -176,7 +176,7 @@ def container(self) -> docker.models.containers.Container: @container.setter def container(self, instance: docker.models.containers.Container) -> None: """Set the Docker container for this function. - + Args: instance: New Docker container instance """ @@ -185,7 +185,7 @@ def container(self, instance: docker.models.containers.Container) -> None: @property def url(self) -> str: """Get the URL for function invocation. - + Returns: str: HTTP URL for invoking the function """ @@ -194,7 +194,7 @@ def url(self) -> str: @property def memory_measurement_pid(self) -> Optional[int]: """Get the PID of the memory measurement process. - + Returns: Optional[int]: PID of memory measurement process, or None if not measuring """ @@ -203,7 +203,7 @@ def memory_measurement_pid(self) -> Optional[int]: @staticmethod def typename() -> str: """Get the type name for this function. - + Returns: str: Type name "Local.LocalFunction" """ @@ -211,7 +211,7 @@ def typename() -> str: def serialize(self) -> dict: """Serialize function configuration to dictionary. - + Returns: dict: Dictionary containing function configuration including container details """ @@ -225,13 +225,13 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LocalFunction": """Deserialize function from cached configuration. - + Args: cached_config: Dictionary containing cached function configuration - + Returns: LocalFunction: Deserialized function instance - + Raises: RuntimeError: If cached container is no longer available """ @@ -252,7 +252,7 @@ def deserialize(cached_config: dict) -> "LocalFunction": def stop(self) -> None: """Stop the function container. - + Stops the Docker container running this function with immediate timeout. """ self.logging.info(f"Stopping function container {self._instance_id}") diff --git a/sebs/local/local.py b/sebs/local/local.py index 80d40c43..415793c1 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -40,17 +40,17 @@ class Local(System): """Local execution platform implementation. - + Implements the System interface for running serverless functions locally using Docker containers. Provides development and testing capabilities without requiring cloud platform deployment. - + This platform supports: - HTTP-triggered function execution - Memory measurement and profiling - Multi-container deployment - Cross-platform operation (Linux, macOS, Windows) - + Attributes: DEFAULT_PORT: Default port number for function containers (9000) _config: Local platform configuration @@ -64,7 +64,7 @@ class Local(System): @staticmethod def name() -> str: """Get the platform name. - + Returns: str: Platform name "local" """ @@ -73,7 +73,7 @@ def name() -> str: @staticmethod def typename() -> str: """Get the platform type name. - + Returns: str: Type name "Local" """ @@ -82,7 +82,7 @@ def typename() -> str: @staticmethod def function_type() -> "Type[Function]": """Get the function type for this platform. - + Returns: Type[Function]: LocalFunction class """ @@ -91,7 +91,7 @@ def function_type() -> "Type[Function]": @property def config(self) -> LocalConfig: """Get the local platform configuration. - + Returns: LocalConfig: The platform configuration """ @@ -100,7 +100,7 @@ def config(self) -> LocalConfig: @property def remove_containers(self) -> bool: """Get whether containers are automatically removed. - + Returns: bool: True if containers are removed after use """ @@ -109,7 +109,7 @@ def remove_containers(self) -> bool: @remove_containers.setter def remove_containers(self, val: bool) -> None: """Set whether containers are automatically removed. - + Args: val: Whether to remove containers after use """ @@ -118,7 +118,7 @@ def remove_containers(self, val: bool) -> None: @property def measure_interval(self) -> int: """Get the memory measurement interval. - + Returns: int: Measurement interval in milliseconds, -1 if disabled """ @@ -127,7 +127,7 @@ def measure_interval(self) -> int: @property def measurements_enabled(self) -> bool: """Check if memory measurements are enabled. - + Returns: bool: True if measurements are enabled """ @@ -136,7 +136,7 @@ def measurements_enabled(self) -> bool: @property def measurement_path(self) -> Optional[str]: """Get the path to the memory measurement file. - + Returns: Optional[str]: Path to measurement file, or None if not set """ @@ -151,7 +151,7 @@ def __init__( logger_handlers: LoggingHandlers, ): """Initialize the local execution platform. - + Args: sebs_config: Global SeBS configuration config: Local platform configuration @@ -178,7 +178,7 @@ def __init__( def shutdown(self) -> None: """Shut down the local platform. - + Performs cleanup operations including shutting down any storage instances. """ super().shutdown() @@ -194,18 +194,18 @@ def package_code( container_deployment: bool, ) -> Tuple[str, int, str]: """Package function code for local execution. - + Creates a compatible code package structure for local execution that maintains compatibility across cloud providers. Reorganizes files into a module structure to handle relative imports properly. - + The packaging creates this structure: - function/ - function.py - storage.py - resources/ - handler.py - + Args: directory: Directory containing the function code language_name: Programming language (e.g., "python", "nodejs") @@ -214,7 +214,7 @@ def package_code( benchmark: Benchmark name is_cached: Whether the package is from cache container_deployment: Whether using container deployment - + Returns: Tuple[str, int, str]: (package_path, size_bytes, deployment_package_uri) """ @@ -242,19 +242,19 @@ def _start_container( self, code_package: Benchmark, func_name: str, func: Optional[LocalFunction] ) -> LocalFunction: """Start a Docker container for function execution. - + Creates and starts a Docker container running the function code. Handles port allocation, environment setup, volume mounting, and health checking. Optionally starts memory measurement processes. - + Args: code_package: Benchmark code package to deploy func_name: Name of the function func: Optional existing function to update (for restarts) - + Returns: LocalFunction: Running function instance - + Raises: RuntimeError: If port allocation fails or container won't start """ @@ -402,16 +402,16 @@ def create_function( container_uri: str, ) -> "LocalFunction": """Create a new function deployment. - + Args: code_package: Benchmark code package to deploy func_name: Name for the function container_deployment: Whether to use container deployment (unsupported) container_uri: Container URI (unused for local) - + Returns: LocalFunction: Created function instance - + Raises: NotImplementedError: If container deployment is requested """ @@ -427,9 +427,9 @@ def update_function( container_uri: str, ) -> None: """Update an existing function with new code. - + Stops the existing container and starts a new one with updated code. - + Args: function: Existing function to update code_package: New benchmark code package @@ -443,16 +443,16 @@ def update_function( def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: """Create a trigger for function invocation. - + For local functions, only HTTP triggers are supported. - + Args: func: Function to create trigger for trigger_type: Type of trigger to create - + Returns: Trigger: Created trigger instance - + Raises: RuntimeError: If trigger type is not HTTP """ @@ -471,9 +471,9 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T def cached_function(self, function: Function) -> None: """Handle cached function setup. - + For local functions, no special handling is needed for cached functions. - + Args: function: Cached function instance """ @@ -481,11 +481,11 @@ def cached_function(self, function: Function) -> None: def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: """Update function configuration. - + Args: function: Function to update code_package: Benchmark code package - + Raises: RuntimeError: Always raised as configuration updates are not supported """ @@ -501,9 +501,9 @@ def download_metrics( metrics: dict, ) -> None: """Download execution metrics. - + For local execution, metrics are not available from the platform. - + Args: function_name: Name of the function start_time: Start time for metrics collection @@ -515,11 +515,11 @@ def download_metrics( def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: """Enforce cold start for functions. - + Args: functions: List of functions to enforce cold start on code_package: Benchmark code package - + Raises: NotImplementedError: Cold start enforcement is not implemented for local """ @@ -530,13 +530,13 @@ def default_function_name( code_package: Benchmark, resources: Optional[Resources] = None ) -> str: """Generate default function name. - + Creates a standardized function name based on the code package and resources. - + Args: code_package: Benchmark code package resources: Optional resources instance for ID inclusion - + Returns: str: Generated function name """ @@ -559,12 +559,12 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: """Format function name for platform requirements. - + For local execution, no formatting is needed. - + Args: func_name: Function name to format - + Returns: str: Formatted function name (unchanged for local) """ @@ -572,13 +572,13 @@ def format_function_name(func_name: str) -> str: def start_measurements(self, measure_interval: int) -> Optional[str]: """Start memory measurements for function containers. - + Creates a temporary file for storing memory measurements and enables measurement collection at the specified interval. - + Args: measure_interval: Measurement interval in milliseconds - + Returns: Optional[str]: Path to measurement file, or None if measurements disabled """ diff --git a/sebs/local/measureMem.py b/sebs/local/measureMem.py index 9c3ac66b..d6444883 100644 --- a/sebs/local/measureMem.py +++ b/sebs/local/measureMem.py @@ -24,16 +24,16 @@ def measure(container_id: str, measure_interval: int, measurement_file: str) -> None: """Continuously measure memory consumption of a Docker container. - + Reads memory usage from the container's cgroup filesystem at regular intervals and writes measurements to the specified file. Handles different cgroup paths for compatibility with various Docker configurations. - + Args: container_id: Docker container ID to monitor measure_interval: Measurement interval in milliseconds measurement_file: Path to file for writing measurements - + Note: This function runs indefinitely until the process is terminated. It attempts two different cgroup paths to accommodate different @@ -61,32 +61,21 @@ def measure(container_id: str, measure_interval: int, measurement_file: str) -> if __name__ == "__main__": """Parse command line arguments and start memory measurement process. - + Command line arguments: --container-id: Docker container ID to monitor --measurement-file: Path to file for writing measurements --measure-interval: Measurement interval in milliseconds """ - parser = argparse.ArgumentParser( - description="Measure memory consumption of a Docker container" - ) + parser = argparse.ArgumentParser(description="Measure memory consumption of a Docker container") parser.add_argument( - "--container-id", - type=str, - required=True, - help="Docker container ID to monitor" + "--container-id", type=str, required=True, help="Docker container ID to monitor" ) parser.add_argument( - "--measurement-file", - type=str, - required=True, - help="Path to file for writing measurements" + "--measurement-file", type=str, required=True, help="Path to file for writing measurements" ) parser.add_argument( - "--measure-interval", - type=int, - required=True, - help="Measurement interval in milliseconds" + "--measure-interval", type=int, required=True, help="Measurement interval in milliseconds" ) args, unknown = parser.parse_known_args() measure(args.container_id, args.measure_interval, args.measurement_file) diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py index d16815c8..8ddfa65c 100644 --- a/sebs/openwhisk/config.py +++ b/sebs/openwhisk/config.py @@ -24,10 +24,10 @@ class OpenWhiskCredentials(Credentials): """ Manages authentication credentials for OpenWhisk deployments. - + This class handles credential serialization and deserialization for OpenWhisk platforms. Currently implements a minimal credential system. - + Note: OpenWhisk deployments typically rely on local authentication through the wsk CLI tool rather than explicit credential management. @@ -37,12 +37,12 @@ class OpenWhiskCredentials(Credentials): def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Credentials: """ Deserialize OpenWhisk credentials from configuration. - + Args: config: Configuration dictionary containing credential data cache: Cache instance for storing/retrieving cached credentials handlers: Logging handlers for credential operations - + Returns: OpenWhiskCredentials instance (currently empty) """ @@ -51,7 +51,7 @@ def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) def serialize(self) -> Dict[str, Any]: """ Serialize credentials to dictionary format. - + Returns: Empty dictionary as OpenWhisk uses CLI-based authentication """ @@ -61,11 +61,11 @@ def serialize(self) -> Dict[str, Any]: class OpenWhiskResources(SelfHostedResources): """ Manages Docker registry and storage resources for OpenWhisk deployments. - + This class handles configuration of Docker registries, authentication credentials, and storage resources needed for OpenWhisk function deployments. It supports both user-provided and cached configurations. - + Attributes: _docker_registry: Docker registry URL for storing function images _docker_username: Username for Docker registry authentication @@ -83,7 +83,7 @@ def __init__( ) -> None: """ Initialize OpenWhisk resources configuration. - + Args: registry: Docker registry URL for storing function images username: Username for Docker registry authentication @@ -101,7 +101,7 @@ def __init__( def typename() -> str: """ Get the type name for this resource class. - + Returns: String identifier for OpenWhisk resources """ @@ -111,7 +111,7 @@ def typename() -> str: def docker_registry(self) -> Optional[str]: """ Get the Docker registry URL. - + Returns: Docker registry URL or None if not configured """ @@ -121,7 +121,7 @@ def docker_registry(self) -> Optional[str]: def docker_username(self) -> Optional[str]: """ Get the Docker registry username. - + Returns: Docker registry username or None if not configured """ @@ -131,7 +131,7 @@ def docker_username(self) -> Optional[str]: def docker_password(self) -> Optional[str]: """ Get the Docker registry password. - + Returns: Docker registry password or None if not configured """ @@ -141,7 +141,7 @@ def docker_password(self) -> Optional[str]: def storage_updated(self) -> bool: """ Check if storage configuration has been updated. - + Returns: True if storage configuration has been updated, False otherwise """ @@ -151,7 +151,7 @@ def storage_updated(self) -> bool: def registry_updated(self) -> bool: """ Check if registry configuration has been updated. - + Returns: True if registry configuration has been updated, False otherwise """ @@ -161,7 +161,7 @@ def registry_updated(self) -> bool: def initialize(res: Resources, dct: Dict[str, Any]) -> None: """ Initialize OpenWhisk resources from dictionary configuration. - + Args: res: Resources instance to initialize dct: Dictionary containing Docker registry configuration @@ -176,15 +176,15 @@ def initialize(res: Resources, dct: Dict[str, Any]) -> None: def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Resources: """ Deserialize OpenWhisk resources from configuration. - + This method handles both user-provided configuration and cached values, prioritizing user configuration while detecting updates. - + Args: config: Configuration dictionary that may contain 'docker_registry' section cache: Cache instance to retrieve/store configuration handlers: Logging handlers for resource operations - + Returns: OpenWhiskResources instance with appropriate configuration """ @@ -233,7 +233,7 @@ def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) def update_cache(self, cache: Cache) -> None: """ Update cache with current resource configuration. - + Args: cache: Cache instance to update with current configuration """ @@ -251,7 +251,7 @@ def update_cache(self, cache: Cache) -> None: def serialize(self) -> Dict[str, Any]: """ Serialize resource configuration to dictionary. - + Returns: Dictionary containing all resource configuration including Docker registry settings and inherited storage configuration @@ -268,11 +268,11 @@ def serialize(self) -> Dict[str, Any]: class OpenWhiskConfig(Config): """ Main configuration class for OpenWhisk deployments. - + This class manages all OpenWhisk-specific configuration settings including cluster management, WSK CLI settings, and experimental features. It integrates with the broader SeBS configuration system. - + Attributes: name: Platform name identifier ('openwhisk') shutdownStorage: Whether to shutdown storage after experiments @@ -284,7 +284,7 @@ class OpenWhiskConfig(Config): _credentials: OpenWhisk credentials configuration _resources: OpenWhisk resources configuration """ - + name: str shutdownStorage: bool removeCluster: bool @@ -296,7 +296,7 @@ class OpenWhiskConfig(Config): def __init__(self, config: Dict[str, Any], cache: Cache) -> None: """ Initialize OpenWhisk configuration. - + Args: config: Configuration dictionary containing OpenWhisk settings cache: Cache instance for configuration persistence @@ -315,7 +315,7 @@ def __init__(self, config: Dict[str, Any], cache: Cache) -> None: def credentials(self) -> OpenWhiskCredentials: """ Get OpenWhisk credentials configuration. - + Returns: OpenWhiskCredentials instance """ @@ -325,7 +325,7 @@ def credentials(self) -> OpenWhiskCredentials: def resources(self) -> OpenWhiskResources: """ Get OpenWhisk resources configuration. - + Returns: OpenWhiskResources instance """ @@ -335,7 +335,7 @@ def resources(self) -> OpenWhiskResources: def initialize(cfg: Config, dct: Dict[str, Any]) -> None: """ Initialize configuration from dictionary (currently no-op). - + Args: cfg: Configuration instance to initialize dct: Dictionary containing initialization data @@ -345,7 +345,7 @@ def initialize(cfg: Config, dct: Dict[str, Any]) -> None: def serialize(self) -> Dict[str, Any]: """ Serialize configuration to dictionary format. - + Returns: Dictionary containing all OpenWhisk configuration settings including credentials and resources @@ -365,12 +365,12 @@ def serialize(self) -> Dict[str, Any]: def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Config: """ Deserialize OpenWhisk configuration from dictionary and cache. - + Args: config: Configuration dictionary containing OpenWhisk settings cache: Cache instance to retrieve cached configuration handlers: Logging handlers for configuration operations - + Returns: OpenWhiskConfig instance with deserialized configuration """ @@ -387,7 +387,7 @@ def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) def update_cache(self, cache: Cache) -> None: """ Update cache with current configuration values. - + Args: cache: Cache instance to update with current configuration """ diff --git a/sebs/openwhisk/container.py b/sebs/openwhisk/container.py index be7fa1b2..5a6ec9c2 100644 --- a/sebs/openwhisk/container.py +++ b/sebs/openwhisk/container.py @@ -19,25 +19,25 @@ class OpenWhiskContainer(DockerContainer): """ OpenWhisk-specific Docker container management. - + This class extends the base DockerContainer to provide OpenWhisk-specific functionality for managing Docker images, registries, and container deployment. It handles Docker registry authentication and image URI generation for OpenWhisk function deployments. - + Attributes: config: OpenWhisk configuration containing registry settings - + Example: >>> container = OpenWhiskContainer(sys_config, ow_config, docker_client, True) >>> registry, repo, tag, uri = container.registry_name("benchmark", "python", "3.8", "x86_64") """ - + @staticmethod def name() -> str: """ Get the platform name identifier. - + Returns: Platform name as string """ @@ -47,7 +47,7 @@ def name() -> str: def typename() -> str: """ Get the container type name. - + Returns: Container type name as string """ @@ -62,7 +62,7 @@ def __init__( ) -> None: """ Initialize OpenWhisk container manager. - + Args: system_config: Global SeBS system configuration config: OpenWhisk-specific configuration settings @@ -77,24 +77,24 @@ def registry_name( ) -> Tuple[str, str, str, str]: """ Generate Docker registry information for a benchmark image. - + This method creates the appropriate registry name, repository name, image tag, and complete image URI based on the benchmark parameters and OpenWhisk configuration. It handles both custom registries and Docker Hub. - + Args: benchmark: Name of the benchmark language_name: Programming language (e.g., 'python', 'nodejs') language_version: Language version (e.g., '3.8', '14') architecture: Target architecture (e.g., 'x86_64') - + Returns: Tuple containing: - Registry name (e.g., "my-registry.com" or "Docker Hub") - Full repository name with registry prefix - Image tag - Complete image URI - + Example: >>> registry, repo, tag, uri = container.registry_name("test", "python", "3.8", "x86_64") >>> # Returns: ("Docker Hub", "sebs", "openwhisk-test-python-3.8-x86_64", "sebs:openwhisk-test-python-3.8-x86_64") diff --git a/sebs/openwhisk/function.py b/sebs/openwhisk/function.py index 685797f9..8c9b7c38 100644 --- a/sebs/openwhisk/function.py +++ b/sebs/openwhisk/function.py @@ -23,17 +23,17 @@ class OpenWhiskFunctionConfig(FunctionConfig): """ Configuration data class for OpenWhisk functions. - + This class extends the base FunctionConfig to include OpenWhisk-specific configuration parameters such as Docker image information, namespace settings, and storage configurations for both object and NoSQL storage. - + Attributes: docker_image: Docker image URI used for the function deployment namespace: OpenWhisk namespace (default: "_" for default namespace) object_storage: Minio object storage configuration if required nosql_storage: ScyllaDB NoSQL storage configuration if required - + Note: The docker_image attribute should be merged with higher-level image abstraction in future refactoring. @@ -49,10 +49,10 @@ class OpenWhiskFunctionConfig(FunctionConfig): def deserialize(data: Dict[str, Any]) -> OpenWhiskFunctionConfig: """ Deserialize configuration from dictionary data. - + Args: data: Dictionary containing serialized configuration data - + Returns: OpenWhiskFunctionConfig instance with deserialized data """ @@ -66,7 +66,7 @@ def deserialize(data: Dict[str, Any]) -> OpenWhiskFunctionConfig: def serialize(self) -> Dict[str, Any]: """ Serialize configuration to dictionary format. - + Returns: Dictionary containing all configuration data """ @@ -76,10 +76,10 @@ def serialize(self) -> Dict[str, Any]: def from_benchmark(benchmark: Benchmark) -> OpenWhiskFunctionConfig: """ Create configuration from benchmark specification. - + Args: benchmark: Benchmark instance containing configuration requirements - + Returns: OpenWhiskFunctionConfig instance initialized from benchmark """ @@ -91,25 +91,25 @@ def from_benchmark(benchmark: Benchmark) -> OpenWhiskFunctionConfig: class OpenWhiskFunction(Function): """ OpenWhisk-specific function implementation for SeBS. - + This class provides OpenWhisk-specific function management including configuration handling, serialization, and trigger management. It integrates with OpenWhisk actions and maintains Docker image information. - + Attributes: _cfg: OpenWhisk-specific function configuration - + Example: >>> config = OpenWhiskFunctionConfig.from_benchmark(benchmark) >>> function = OpenWhiskFunction("test-func", "benchmark-name", "hash123", config) """ - + def __init__( self, name: str, benchmark: str, code_package_hash: str, cfg: OpenWhiskFunctionConfig ) -> None: """ Initialize OpenWhisk function. - + Args: name: Function name (OpenWhisk action name) benchmark: Name of the benchmark this function implements @@ -122,7 +122,7 @@ def __init__( def config(self) -> OpenWhiskFunctionConfig: """ Get OpenWhisk-specific function configuration. - + Returns: OpenWhiskFunctionConfig instance with current settings """ @@ -132,7 +132,7 @@ def config(self) -> OpenWhiskFunctionConfig: def typename() -> str: """ Get the type name for this function class. - + Returns: String identifier for OpenWhisk functions """ @@ -141,7 +141,7 @@ def typename() -> str: def serialize(self) -> Dict[str, Any]: """ Serialize function to dictionary format. - + Returns: Dictionary containing function data and OpenWhisk-specific configuration """ @@ -151,14 +151,14 @@ def serialize(self) -> Dict[str, Any]: def deserialize(cached_config: Dict[str, Any]) -> OpenWhiskFunction: """ Deserialize function from cached configuration data. - + Args: cached_config: Dictionary containing cached function configuration and trigger information - + Returns: OpenWhiskFunction instance with deserialized configuration and triggers - + Raises: AssertionError: If unknown trigger type is encountered """ diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index bb9f4be0..b18d1d86 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -35,21 +35,22 @@ class OpenWhisk(System): """ Apache OpenWhisk serverless platform implementation for SeBS. - + This class provides the main integration between SeBS and Apache OpenWhisk, handling function deployment, execution, container management, and resource allocation. It supports both local and remote OpenWhisk deployments with Docker-based function packaging. - + Attributes: _config: OpenWhisk-specific configuration settings container_client: Docker container client for function packaging logging_handlers: Logging handlers for the OpenWhisk system - + Example: >>> openwhisk = OpenWhisk(sys_config, ow_config, cache, docker_client, handlers) >>> function = openwhisk.create_function(benchmark, "test-func", True, "image:tag") """ + _config: OpenWhiskConfig def __init__( @@ -62,7 +63,7 @@ def __init__( ) -> None: """ Initialize OpenWhisk system with configuration and clients. - + Args: system_config: Global SeBS system configuration config: OpenWhisk-specific configuration settings @@ -98,10 +99,12 @@ def __init__( password=self.config.resources.docker_password, ) - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None) -> None: + def initialize( + self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None + ) -> None: """ Initialize OpenWhisk system resources. - + Args: config: Additional configuration parameters (currently unused) resource_prefix: Optional prefix for resource naming @@ -112,7 +115,7 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] def config(self) -> OpenWhiskConfig: """ Get OpenWhisk configuration. - + Returns: OpenWhisk configuration instance """ @@ -121,7 +124,7 @@ def config(self) -> OpenWhiskConfig: def shutdown(self) -> None: """ Shutdown OpenWhisk system and clean up resources. - + This method stops storage services if configured and optionally removes the OpenWhisk cluster based on configuration settings. """ @@ -137,7 +140,7 @@ def shutdown(self) -> None: def name() -> str: """ Get the platform name identifier. - + Returns: Platform name as string """ @@ -147,7 +150,7 @@ def name() -> str: def typename() -> str: """ Get the platform type name. - + Returns: Platform type name as string """ @@ -157,7 +160,7 @@ def typename() -> str: def function_type() -> "Type[Function]": """ Get the function type for this platform. - + Returns: OpenWhiskFunction class type """ @@ -166,7 +169,7 @@ def function_type() -> "Type[Function]": def get_wsk_cmd(self) -> List[str]: """ Get the WSK CLI command with appropriate flags. - + Returns: List of command arguments for WSK CLI execution """ @@ -187,11 +190,11 @@ def package_code( ) -> Tuple[str, int, str]: """ Package benchmark code for OpenWhisk deployment. - + Creates both a Docker image and a ZIP archive containing the benchmark code. The ZIP archive is required for OpenWhisk function registration even when using Docker-based deployment. - + Args: directory: Path to the benchmark code directory language_name: Programming language (e.g., 'python', 'nodejs') @@ -200,13 +203,13 @@ def package_code( benchmark: Benchmark name is_cached: Whether Docker image is already cached container_deployment: Whether to use container-based deployment - + Returns: Tuple containing: - Path to created ZIP archive - Size of ZIP archive in bytes - Docker image URI - + Raises: RuntimeError: If packaging fails """ @@ -237,13 +240,13 @@ def package_code( def storage_arguments(self, code_package: Benchmark) -> List[str]: """ Generate storage-related arguments for function deployment. - + Creates WSK CLI parameters for Minio object storage and ScyllaDB NoSQL storage configurations based on the benchmark requirements. - + Args: code_package: Benchmark configuration requiring storage access - + Returns: List of WSK CLI parameter arguments for storage configuration """ @@ -290,20 +293,20 @@ def create_function( ) -> "OpenWhiskFunction": """ Create or retrieve an OpenWhisk function (action). - + This method checks if a function already exists and updates it if necessary, or creates a new function with the appropriate configuration, storage settings, and Docker image. - + Args: code_package: Benchmark configuration and code package func_name: Name for the OpenWhisk action container_deployment: Whether to use container-based deployment container_uri: URI of the Docker image for the function - + Returns: OpenWhiskFunction instance configured with LibraryTrigger - + Raises: RuntimeError: If WSK CLI is not accessible or function creation fails """ @@ -395,13 +398,13 @@ def update_function( ) -> None: """ Update an existing OpenWhisk function with new code and configuration. - + Args: function: Existing function to update code_package: New benchmark configuration and code package container_deployment: Whether to use container-based deployment container_uri: URI of the new Docker image - + Raises: RuntimeError: If WSK CLI is not accessible or update fails """ @@ -450,14 +453,14 @@ def update_function( def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: """ Update configuration of an existing OpenWhisk function. - + Updates memory allocation, timeout, and storage parameters without changing the function code or Docker image. - + Args: function: Function to update configuration for code_package: New benchmark configuration settings - + Raises: RuntimeError: If WSK CLI is not accessible or configuration update fails """ @@ -491,14 +494,14 @@ def update_function_configuration(self, function: Function, code_package: Benchm def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: """ Check if function configuration has changed compared to cached version. - + Compares current benchmark configuration and storage settings with the cached function configuration to determine if an update is needed. - + Args: cached_function: Previously cached function configuration benchmark: Current benchmark configuration to compare against - + Returns: True if configuration has changed and function needs updating """ @@ -531,11 +534,11 @@ def default_function_name( ) -> str: """ Generate default function name based on benchmark and resource configuration. - + Args: code_package: Benchmark package containing name and language info resources: Optional specific resources to use for naming - + Returns: Generated function name string """ @@ -548,11 +551,11 @@ def default_function_name( def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: """ Enforce cold start for functions (not implemented for OpenWhisk). - + Args: functions: List of functions to enforce cold start for code_package: Benchmark package configuration - + Raises: NotImplementedError: Cold start enforcement not implemented for OpenWhisk """ @@ -568,14 +571,14 @@ def download_metrics( ) -> None: """ Download metrics for function executions (no-op for OpenWhisk). - + Args: function_name: Name of the function to download metrics for start_time: Start time for metrics collection (epoch timestamp) end_time: End time for metrics collection (epoch timestamp) requests: Dictionary mapping request IDs to execution results metrics: Dictionary to store downloaded metrics - + Note: OpenWhisk metrics collection is not currently implemented. """ @@ -584,14 +587,14 @@ def download_metrics( def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: """ Create a trigger for function invocation. - + Args: function: Function to create trigger for trigger_type: Type of trigger to create (LIBRARY or HTTP) - + Returns: Created trigger instance - + Raises: RuntimeError: If WSK CLI is not accessible or trigger type not supported """ @@ -623,9 +626,9 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) def cached_function(self, function: Function) -> None: """ Configure a cached function with current system settings. - + Updates triggers with current logging handlers and WSK command configuration. - + Args: function: Cached function to configure """ @@ -638,7 +641,7 @@ def cached_function(self, function: Function) -> None: def disable_rich_output(self) -> None: """ Disable rich output formatting for container operations. - + This is useful for non-interactive environments or when plain text output is preferred. """ diff --git a/sebs/openwhisk/triggers.py b/sebs/openwhisk/triggers.py index 1dd3bb1d..055bb65f 100644 --- a/sebs/openwhisk/triggers.py +++ b/sebs/openwhisk/triggers.py @@ -21,24 +21,24 @@ class LibraryTrigger(Trigger): """ CLI-based trigger for OpenWhisk function invocation. - + This trigger uses the wsk CLI tool to invoke OpenWhisk actions directly, providing synchronous and asynchronous invocation capabilities. It handles parameter passing and result parsing for CLI-based invocations. - + Attributes: fname: Name of the OpenWhisk action to invoke _wsk_cmd: Complete WSK CLI command for function invocation - + Example: >>> trigger = LibraryTrigger("my-function", ["wsk", "-i"]) >>> result = trigger.sync_invoke({"key": "value"}) """ - + def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None) -> None: """ Initialize library trigger for OpenWhisk function. - + Args: fname: Name of the OpenWhisk action to invoke wsk_cmd: Optional WSK CLI command prefix (including flags) @@ -52,7 +52,7 @@ def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None) -> None: def trigger_type() -> "Trigger.TriggerType": """ Get the trigger type identifier. - + Returns: TriggerType.LIBRARY for CLI-based invocation """ @@ -62,10 +62,10 @@ def trigger_type() -> "Trigger.TriggerType": def wsk_cmd(self) -> List[str]: """ Get the complete WSK CLI command for invocation. - + Returns: List of command arguments for WSK CLI invocation - + Raises: AssertionError: If wsk_cmd has not been set """ @@ -76,7 +76,7 @@ def wsk_cmd(self) -> List[str]: def wsk_cmd(self, wsk_cmd: List[str]) -> None: """ Set the WSK CLI command prefix. - + Args: wsk_cmd: WSK CLI command prefix (including any flags) """ @@ -86,13 +86,13 @@ def wsk_cmd(self, wsk_cmd: List[str]) -> None: def get_command(payload: Dict[str, Any]) -> List[str]: """ Convert payload dictionary to WSK CLI parameter arguments. - + Args: payload: Dictionary of parameters to pass to the function - + Returns: List of CLI arguments for passing parameters to WSK - + Example: >>> get_command({"key1": "value1", "key2": 42}) ["--param", "key1", '"value1"', "--param", "key2", "42"] @@ -107,10 +107,10 @@ def get_command(payload: Dict[str, Any]) -> List[str]: def sync_invoke(self, payload: Dict[str, Any]) -> ExecutionResult: """ Synchronously invoke the OpenWhisk function via CLI. - + Args: payload: Dictionary of parameters to pass to the function - + Returns: ExecutionResult containing timing information and function output """ @@ -143,10 +143,10 @@ def sync_invoke(self, payload: Dict[str, Any]) -> ExecutionResult: def async_invoke(self, payload: Dict[str, Any]) -> concurrent.futures.Future: """ Asynchronously invoke the OpenWhisk function via CLI. - + Args: payload: Dictionary of parameters to pass to the function - + Returns: Future object that will contain the ExecutionResult """ @@ -157,7 +157,7 @@ def async_invoke(self, payload: Dict[str, Any]) -> concurrent.futures.Future: def serialize(self) -> Dict[str, str]: """ Serialize trigger configuration to dictionary. - + Returns: Dictionary containing trigger type and function name """ @@ -167,10 +167,10 @@ def serialize(self) -> Dict[str, str]: def deserialize(obj: Dict[str, str]) -> Trigger: """ Deserialize trigger from configuration dictionary. - + Args: obj: Dictionary containing serialized trigger data - + Returns: LibraryTrigger instance """ @@ -180,7 +180,7 @@ def deserialize(obj: Dict[str, str]) -> Trigger: def typename() -> str: """ Get the trigger type name. - + Returns: String identifier for this trigger type """ @@ -190,24 +190,24 @@ def typename() -> str: class HTTPTrigger(Trigger): """ HTTP-based trigger for OpenWhisk web action invocation. - + This trigger uses HTTP requests to invoke OpenWhisk web actions, providing an alternative to CLI-based invocation. It inherits HTTP invocation capabilities from the base Trigger class. - + Attributes: fname: Name of the OpenWhisk action url: HTTP URL for the web action endpoint - + Example: >>> trigger = HTTPTrigger("my-function", "https://openwhisk.example.com/api/v1/web/guest/default/my-function.json") >>> result = trigger.sync_invoke({"key": "value"}) """ - + def __init__(self, fname: str, url: str) -> None: """ Initialize HTTP trigger for OpenWhisk web action. - + Args: fname: Name of the OpenWhisk action url: HTTP URL for the web action endpoint @@ -220,7 +220,7 @@ def __init__(self, fname: str, url: str) -> None: def typename() -> str: """ Get the trigger type name. - + Returns: String identifier for this trigger type """ @@ -230,7 +230,7 @@ def typename() -> str: def trigger_type() -> Trigger.TriggerType: """ Get the trigger type identifier. - + Returns: TriggerType.HTTP for HTTP-based invocation """ @@ -239,10 +239,10 @@ def trigger_type() -> Trigger.TriggerType: def sync_invoke(self, payload: Dict[str, Any]) -> ExecutionResult: """ Synchronously invoke the OpenWhisk function via HTTP. - + Args: payload: Dictionary of parameters to pass to the function - + Returns: ExecutionResult containing timing information and function output """ @@ -252,10 +252,10 @@ def sync_invoke(self, payload: Dict[str, Any]) -> ExecutionResult: def async_invoke(self, payload: Dict[str, Any]) -> concurrent.futures.Future: """ Asynchronously invoke the OpenWhisk function via HTTP. - + Args: payload: Dictionary of parameters to pass to the function - + Returns: Future object that will contain the ExecutionResult """ @@ -266,7 +266,7 @@ def async_invoke(self, payload: Dict[str, Any]) -> concurrent.futures.Future: def serialize(self) -> Dict[str, str]: """ Serialize trigger configuration to dictionary. - + Returns: Dictionary containing trigger type, function name, and URL """ @@ -276,10 +276,10 @@ def serialize(self) -> Dict[str, str]: def deserialize(obj: Dict[str, str]) -> Trigger: """ Deserialize trigger from configuration dictionary. - + Args: obj: Dictionary containing serialized trigger data - + Returns: HTTPTrigger instance """ diff --git a/sebs/statistics.py b/sebs/statistics.py index 9598b834..16804eea 100644 --- a/sebs/statistics.py +++ b/sebs/statistics.py @@ -19,13 +19,13 @@ def basic_stats(times: List[float]) -> BasicStats: """Compute basic statistics for a list of measurement times. - + This function computes the mean, median, standard deviation, and coefficient of variation for a list of measurement times. - + Args: times: List of measurement times - + Returns: A BasicStats named tuple with the computed statistics """ @@ -38,15 +38,15 @@ def basic_stats(times: List[float]) -> BasicStats: def ci_tstudents(alpha: float, times: List[float]) -> Tuple[float, float]: """Compute parametric confidence interval using Student's t-distribution. - + This function computes a confidence interval for the mean of the given measurement times using Student's t-distribution. This is a parametric method that assumes the data follows a normal distribution. - + Args: alpha: Confidence level (e.g., 0.95 for 95% confidence) times: List of measurement times - + Returns: A tuple (lower, upper) representing the confidence interval """ @@ -63,8 +63,8 @@ def ci_le_boudec(alpha: float, times: List[float]) -> Tuple[float, float]: of the data. Reference: - J.-Y. Le Boudec, "Methods for the Estimation of the Accuracy of - Measurements in Computer Performance Evaluation", + J.-Y. Le Boudec, "Methods for the Estimation of the Accuracy of + Measurements in Computer Performance Evaluation", Performance Evaluation Review, 2010 Args: diff --git a/sebs/storage/__init__.py b/sebs/storage/__init__.py index 969ebac6..6afc41e8 100644 --- a/sebs/storage/__init__.py +++ b/sebs/storage/__init__.py @@ -30,4 +30,4 @@ storage.config = config storage.start() ``` -""" \ No newline at end of file +""" diff --git a/sebs/storage/config.py b/sebs/storage/config.py index a5124e4a..3b4b217e 100644 --- a/sebs/storage/config.py +++ b/sebs/storage/config.py @@ -17,7 +17,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Dict, List +from typing import Any, Dict, List from sebs.cache import Cache @@ -25,29 +25,29 @@ @dataclass class PersistentStorageConfig(ABC): """Abstract base class for persistent object storage configuration. - + This class defines the interface that all object storage configurations must implement. It provides methods for serialization and environment variable generation that are used for caching and runtime configuration. - + Subclasses must implement: - serialize(): Convert configuration to dictionary for caching - envs(): Generate environment variables for benchmark runtime """ - + @abstractmethod - def serialize(self) -> Dict[str, any]: + def serialize(self) -> Dict[str, Any]: """Serialize the configuration to a dictionary. - + Returns: - Dict[str, any]: Serialized configuration data suitable for JSON storage + Dict[str, Any]: Serialized configuration data suitable for JSON storage """ pass @abstractmethod def envs(self) -> Dict[str, str]: """Generate environment variables for the storage configuration. - + Returns: Dict[str, str]: Environment variables to be set in benchmark runtime """ @@ -57,11 +57,11 @@ def envs(self) -> Dict[str, str]: @dataclass class MinioConfig(PersistentStorageConfig): """Configuration for MinIO S3-compatible object storage. - + MinIO provides a local S3-compatible object storage service that runs in a Docker container. This configuration class stores all the necessary parameters for deploying and connecting to a MinIO instance. - + Attributes: address: Network address where MinIO is accessible (auto-detected) mapped_port: Host port mapped to MinIO's internal port 9000 @@ -74,6 +74,7 @@ class MinioConfig(PersistentStorageConfig): data_volume: Host directory path for persistent data storage type: Storage type identifier (always "minio") """ + address: str = "" mapped_port: int = -1 access_key: str = "" @@ -87,11 +88,11 @@ class MinioConfig(PersistentStorageConfig): def update_cache(self, path: List[str], cache: Cache) -> None: """Update the cache with this configuration's values. - + Stores all configuration fields in the cache using the specified path as a prefix. This allows the configuration to be restored later from the cache. - + Args: path: Cache key path prefix for this configuration cache: Cache instance to store configuration in @@ -102,16 +103,16 @@ def update_cache(self, path: List[str], cache: Cache) -> None: cache.update_config(val=getattr(self, key), keys=[*path, key]) @staticmethod - def deserialize(data: Dict[str, any]) -> "MinioConfig": + def deserialize(data: Dict[str, Any]) -> "MinioConfig": """Deserialize configuration from a dictionary. - + Creates a new MinioConfig instance from dictionary data, typically loaded from cache or configuration files. Only known configuration fields are used, unknown fields are ignored. - + Args: data: Dictionary containing configuration data - + Returns: MinioConfig: New configuration instance """ @@ -122,20 +123,20 @@ def deserialize(data: Dict[str, any]) -> "MinioConfig": return cfg - def serialize(self) -> Dict[str, any]: + def serialize(self) -> Dict[str, Any]: """Serialize the configuration to a dictionary. - + Returns: - Dict[str, any]: All configuration fields as a dictionary + Dict[str, Any]: All configuration fields as a dictionary """ return self.__dict__ def envs(self) -> Dict[str, str]: """Generate environment variables for MinIO configuration. - + Creates environment variables that can be used by benchmark functions to connect to the MinIO storage instance. - + Returns: Dict[str, str]: Environment variables for MinIO connection """ @@ -149,21 +150,21 @@ def envs(self) -> Dict[str, str]: @dataclass class NoSQLStorageConfig(ABC): """Abstract base class for NoSQL database storage configuration. - + This class defines the interface that all NoSQL storage configurations must implement. It provides serialization methods used for caching and configuration management. - + Subclasses must implement: - serialize(): Convert configuration to dictionary for caching """ - + @abstractmethod - def serialize(self) -> Dict[str, any]: + def serialize(self) -> Dict[str, Any]: """Serialize the configuration to a dictionary. - + Returns: - Dict[str, any]: Serialized configuration data suitable for JSON storage + Dict[str, Any]: Serialized configuration data suitable for JSON storage """ pass @@ -171,11 +172,11 @@ def serialize(self) -> Dict[str, any]: @dataclass class ScyllaDBConfig(NoSQLStorageConfig): """Configuration for ScyllaDB DynamoDB-compatible NoSQL storage. - + ScyllaDB provides a high-performance NoSQL database with DynamoDB-compatible API through its Alternator interface. This configuration class stores all the necessary parameters for deploying and connecting to a ScyllaDB instance. - + Attributes: address: Network address where ScyllaDB is accessible (auto-detected) mapped_port: Host port mapped to ScyllaDB's Alternator port @@ -189,6 +190,7 @@ class ScyllaDBConfig(NoSQLStorageConfig): version: ScyllaDB Docker image version to use data_volume: Host directory path for persistent data storage """ + address: str = "" mapped_port: int = -1 alternator_port: int = 8000 @@ -203,11 +205,11 @@ class ScyllaDBConfig(NoSQLStorageConfig): def update_cache(self, path: List[str], cache: Cache) -> None: """Update the cache with this configuration's values. - + Stores all configuration fields in the cache using the specified path as a prefix. This allows the configuration to be restored later from the cache. - + Args: path: Cache key path prefix for this configuration cache: Cache instance to store configuration in @@ -216,16 +218,16 @@ def update_cache(self, path: List[str], cache: Cache) -> None: cache.update_config(val=getattr(self, key), keys=[*path, key]) @staticmethod - def deserialize(data: Dict[str, any]) -> "ScyllaDBConfig": + def deserialize(data: Dict[str, Any]) -> "ScyllaDBConfig": """Deserialize configuration from a dictionary. - + Creates a new ScyllaDBConfig instance from dictionary data, typically loaded from cache or configuration files. Only known configuration fields are used, unknown fields are ignored. - + Args: data: Dictionary containing configuration data - + Returns: ScyllaDBConfig: New configuration instance """ @@ -236,10 +238,10 @@ def deserialize(data: Dict[str, any]) -> "ScyllaDBConfig": return cfg - def serialize(self) -> Dict[str, any]: + def serialize(self) -> Dict[str, Any]: """Serialize the configuration to a dictionary. - + Returns: - Dict[str, any]: All configuration fields as a dictionary + Dict[str, Any]: All configuration fields as a dictionary """ return self.__dict__ diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index 76b2413f..ace127ac 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -28,21 +28,21 @@ class Minio(PersistentStorage): """ S3-compatible object storage implementation using MinIO. - + This class manages a MinIO storage instance running in a Docker container, providing S3-compatible object storage for local benchmarking. It handles bucket creation, file uploads/downloads, and container lifecycle management. - + Attributes: config: MinIO configuration settings connection: MinIO client connection """ - + @staticmethod def typename() -> str: """ Get the qualified type name of this class. - + Returns: str: Full type name including deployment name """ @@ -52,7 +52,7 @@ def typename() -> str: def deployment_name() -> str: """ Get the deployment platform name. - + Returns: str: Deployment name ('minio') """ @@ -70,7 +70,7 @@ def __init__( ): """ Initialize a MinIO storage instance. - + Args: docker_client: Docker client for managing the MinIO container cache_client: Cache client for storing storage configuration @@ -86,7 +86,7 @@ def __init__( def config(self) -> MinioConfig: """ Get the MinIO configuration. - + Returns: MinioConfig: The configuration object """ @@ -96,7 +96,7 @@ def config(self) -> MinioConfig: def config(self, config: MinioConfig): """ Set the MinIO configuration. - + Args: config: New configuration object """ @@ -106,11 +106,11 @@ def config(self, config: MinioConfig): def _define_http_client() -> Any: """ Configure HTTP client for MinIO with appropriate timeouts and retries. - + MinIO does not provide a direct way to configure connection timeouts, so we need to create a custom HTTP client with proper timeout settings. The rest of configuration follows MinIO's default client settings. - + Returns: urllib3.PoolManager: Configured HTTP client for MinIO """ @@ -130,12 +130,12 @@ def _define_http_client() -> Any: def start(self) -> None: """ Start a MinIO storage container. - + Creates and runs a Docker container with MinIO, configuring it with random credentials and mounting a volume for persistent storage. The container runs in detached mode and is accessible via the configured port. - + Raises: RuntimeError: If starting the MinIO container fails """ @@ -161,7 +161,7 @@ def start(self) -> None: self._cfg.address = "" self.logging.info("Minio storage ACCESS_KEY={}".format(self._cfg.access_key)) self.logging.info("Minio storage SECRET_KEY={}".format(self._cfg.secret_key)) - + try: self.logging.info(f"Starting storage Minio on port {self._cfg.mapped_port}") # Run the MinIO container @@ -192,11 +192,11 @@ def start(self) -> None: def configure_connection(self) -> None: """ Configure the connection to the MinIO container. - + Determines the appropriate address to connect to the MinIO container based on the host platform. For Linux, it uses the container's IP address, while for Windows, macOS, or WSL it uses localhost with the mapped port. - + Raises: RuntimeError: If the MinIO container is not available or if the IP address cannot be detected @@ -234,14 +234,14 @@ def configure_connection(self) -> None: f"Incorrect detection of IP address for container with id {self._cfg.instance_id}" ) self.logging.info("Starting minio instance at {}".format(self._cfg.address)) - + # Create the connection using the configured address self.connection = self.get_connection() def stop(self) -> None: """ Stop the MinIO container. - + Gracefully stops the running MinIO container if it exists. Logs an error if the container is not known. """ @@ -255,10 +255,10 @@ def stop(self) -> None: def get_connection(self) -> minio.Minio: """ Create a new MinIO client connection. - + Creates a connection to the MinIO server using the configured address, credentials, and HTTP client settings. - + Returns: minio.Minio: Configured MinIO client """ @@ -270,21 +270,23 @@ def get_connection(self) -> minio.Minio: http_client=Minio._define_http_client(), ) - def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: bool = False) -> str: + def _create_bucket( + self, name: str, buckets: List[str] = [], randomize_name: bool = False + ) -> str: """ Create a new bucket if it doesn't already exist. - + Checks if a bucket with the given name already exists in the list of buckets. If not, creates a new bucket with either the exact name or a randomized name. - + Args: name: Base name for the bucket buckets: List of existing bucket names to check against randomize_name: Whether to append a random UUID to the bucket name - + Returns: str: Name of the existing or newly created bucket - + Raises: minio.error.ResponseError: If bucket creation fails """ @@ -295,13 +297,13 @@ def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: boo "Bucket {} for {} already exists, skipping.".format(bucket_name, name) ) return bucket_name - + # MinIO has limit of bucket name to 16 characters if randomize_name: bucket_name = "{}-{}".format(name, str(uuid.uuid4())[0:16]) else: bucket_name = name - + try: self.connection.make_bucket(bucket_name, location=self.MINIO_REGION) self.logging.info("Created bucket {}".format(bucket_name)) @@ -318,15 +320,15 @@ def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: boo def uploader_func(self, path_idx: int, file: str, filepath: str) -> None: """ Upload a file to the MinIO storage. - + Uploads a file to the specified input prefix in the benchmarks bucket. This function is passed to benchmarks for uploading their input data. - + Args: path_idx: Index of the input prefix to use file: Name of the file within the bucket filepath: Local path to the file to upload - + Raises: minio.error.ResponseError: If the upload fails """ @@ -338,61 +340,62 @@ def uploader_func(self, path_idx: int, file: str, filepath: str) -> None: self.logging.error("Upload failed!") raise err - def clean(self) -> None: - """ - Clean all objects from output buckets. - - Removes all objects from the output buckets to prepare for a new - benchmark run. Logs any errors that occur during deletion. - """ - for bucket in self.output_buckets: - objects = self.connection.list_objects_v2(bucket) - objects = [obj.object_name for obj in objects] - for err in self.connection.remove_objects(bucket, objects): - self.logging.error("Deletion Error: {}".format(err)) - - def download_results(self, result_dir: str) -> None: - """ - Download all objects from output buckets to a local directory. - - Downloads benchmark results from all output buckets to a subdirectory - named 'storage_output' within the specified result directory. - - Args: - result_dir: Base directory to store downloaded results - """ - result_dir = os.path.join(result_dir, "storage_output") - for bucket in self.output_buckets: - objects = self.connection.list_objects_v2(bucket) - objects = [obj.object_name for obj in objects] - for obj in objects: - self.connection.fget_object(bucket, obj, os.path.join(result_dir, obj)) - - def clean_bucket(self, bucket: str) -> None: + # FIXME: is still even used anywhere? + # def clean(self) -> None: + # """ + # Clean all objects from output buckets. + + # Removes all objects from the output buckets to prepare for a new + # benchmark run. Logs any errors that occur during deletion. + # """ + # for bucket in self.output_buckets: + # objects = self.connection.list_objects_v2(bucket) + # objects = [obj.object_name for obj in objects] + # for err in self.connection.remove_objects(bucket, objects): + # self.logging.error("Deletion Error: {}".format(err)) + # + # def download_results(self, result_dir: str) -> None: + # """ + # Download all objects from output buckets to a local directory. + + # Downloads benchmark results from all output buckets to a subdirectory + # named 'storage_output' within the specified result directory. + + # Args: + # result_dir: Base directory to store downloaded results + # """ + # result_dir = os.path.join(result_dir, "storage_output") + # for bucket in self.output_buckets: + # objects = self.connection.list_objects_v2(bucket) + # objects = [obj.object_name for obj in objects] + # for obj in objects: + # self.connection.fget_object(bucket, obj, os.path.join(result_dir, obj)) + + def clean_bucket(self, bucket_name: str) -> None: """ Remove all objects from a bucket. - + Deletes all objects within the specified bucket but keeps the bucket itself. Logs any errors that occur during object deletion. - + Args: bucket: Name of the bucket to clean """ delete_object_list = map( lambda x: minio.DeleteObject(x.object_name), - self.connection.list_objects(bucket_name=bucket), + self.connection.list_objects(bucket_name=bucket_name), ) - errors = self.connection.remove_objects(bucket, delete_object_list) + errors = self.connection.remove_objects(bucket_name, delete_object_list) for error in errors: - self.logging.error(f"Error when deleting object from bucket {bucket}: {error}!") + self.logging.error(f"Error when deleting object from bucket {bucket_name}: {error}!") def remove_bucket(self, bucket: str) -> None: """ Delete a bucket completely. - + Removes the specified bucket from the MinIO storage. The bucket must be empty before it can be deleted. - + Args: bucket: Name of the bucket to remove """ @@ -401,13 +404,13 @@ def remove_bucket(self, bucket: str) -> None: def correct_name(self, name: str) -> str: """ Format a bucket name to comply with MinIO naming requirements. - + For MinIO, no name correction is needed (unlike some cloud providers that enforce additional restrictions). - + Args: name: Original bucket name - + Returns: str: Bucket name (unchanged for MinIO) """ @@ -416,9 +419,9 @@ def correct_name(self, name: str) -> str: def download(self, bucket_name: str, key: str, filepath: str) -> None: """ Download an object from a bucket to a local file. - + Not implemented for this class. Use fget_object directly or other methods. - + Raises: NotImplementedError: This method is not implemented """ @@ -427,10 +430,10 @@ def download(self, bucket_name: str, key: str, filepath: str) -> None: def exists_bucket(self, bucket_name: str) -> bool: """ Check if a bucket exists. - + Args: bucket_name: Name of the bucket to check - + Returns: bool: True if the bucket exists, False otherwise """ @@ -439,14 +442,14 @@ def exists_bucket(self, bucket_name: str) -> bool: def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: """ List all objects in a bucket with an optional prefix filter. - + Args: bucket_name: Name of the bucket to list prefix: Optional prefix to filter objects - + Returns: List[str]: List of object names in the bucket - + Raises: RuntimeError: If the bucket does not exist """ @@ -459,10 +462,10 @@ def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: """ List all buckets, optionally filtered by name. - + Args: bucket_name: Optional filter for bucket names - + Returns: List[str]: List of bucket names """ @@ -475,9 +478,9 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: def upload(self, bucket_name: str, filepath: str, key: str) -> None: """ Upload a file to a bucket. - + Not implemented for this class. Use fput_object directly or uploader_func. - + Raises: NotImplementedError: This method is not implemented """ @@ -486,7 +489,7 @@ def upload(self, bucket_name: str, filepath: str, key: str) -> None: def serialize(self) -> Dict[str, Any]: """ Serialize MinIO configuration to a dictionary. - + Returns: dict: Serialized configuration data """ @@ -512,27 +515,27 @@ def _deserialize( ) -> T: """ Deserialize a MinIO instance from cached configuration with custom type. - + Creates a new instance of the specified class type from cached configuration data. This allows platform-specific versions to be deserialized correctly while sharing the core implementation. - + Args: cached_config: Cached MinIO configuration cache_client: Cache client resources: Resources configuration obj_type: Type of object to create (a Minio subclass) - + Returns: T: Deserialized instance of the specified type - + Raises: RuntimeError: If the storage container does not exist """ docker_client = docker.from_env() obj = obj_type(docker_client, cache_client, resources, False) obj._cfg = cached_config - + # Try to reconnect to existing container if ID is available if cached_config.instance_id: instance_id = cached_config.instance_id @@ -542,11 +545,11 @@ def _deserialize( raise RuntimeError(f"Storage container {instance_id} does not exist!") else: obj._storage_container = None - + # Copy bucket information obj._input_prefixes = copy.copy(cached_config.input_buckets) obj._output_prefixes = copy.copy(cached_config.output_buckets) - + # Set up connection obj.configure_connection() return obj @@ -555,14 +558,14 @@ def _deserialize( def deserialize(cached_config: MinioConfig, cache_client: Cache, res: Resources) -> "Minio": """ Deserialize a MinIO instance from cached configuration. - + Creates a new Minio instance from cached configuration data. - + Args: cached_config: Cached MinIO configuration cache_client: Cache client res: Resources configuration - + Returns: Minio: Deserialized Minio instance """ diff --git a/sebs/storage/resources.py b/sebs/storage/resources.py index d4b4b3fd..dea7145a 100644 --- a/sebs/storage/resources.py +++ b/sebs/storage/resources.py @@ -38,17 +38,17 @@ class SelfHostedResources(Resources): """Resource configuration for self-hosted storage deployments. - + This class manages configuration for self-hosted storage services, including object storage (MinIO) and NoSQL storage (ScyllaDB). It provides serialization, caching, and deserialization capabilities for storage configurations. - + Attributes: _object_storage: Configuration for object storage (MinIO) _nosql_storage: Configuration for NoSQL storage (ScyllaDB) """ - + def __init__( self, name: str, @@ -56,7 +56,7 @@ def __init__( nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, ): """Initialize self-hosted resources configuration. - + Args: name: Name of the deployment/resource group storage_cfg: Configuration for object storage service @@ -69,7 +69,7 @@ def __init__( @property def storage_config(self) -> Optional[PersistentStorageConfig]: """Get the object storage configuration. - + Returns: Optional[PersistentStorageConfig]: Object storage configuration or None """ @@ -78,7 +78,7 @@ def storage_config(self) -> Optional[PersistentStorageConfig]: @property def nosql_storage_config(self) -> Optional[NoSQLStorageConfig]: """Get the NoSQL storage configuration. - + Returns: Optional[NoSQLStorageConfig]: NoSQL storage configuration or None """ @@ -86,7 +86,7 @@ def nosql_storage_config(self) -> Optional[NoSQLStorageConfig]: def serialize(self) -> Dict[str, any]: """Serialize the resource configuration to a dictionary. - + Returns: Dict[str, any]: Serialized configuration containing storage and/or nosql sections """ @@ -102,10 +102,10 @@ def serialize(self) -> Dict[str, any]: def update_cache(self, cache: Cache) -> None: """Update the configuration cache with current resource settings. - + Stores both object storage and NoSQL storage configurations in the cache for later retrieval. - + Args: cache: Cache instance to store configurations in """ @@ -123,15 +123,15 @@ def _deserialize_storage( self, config: Dict[str, any], cached_config: Optional[Dict[str, any]], storage_type: str ) -> Tuple[str, Dict[str, any]]: """Deserialize storage configuration from config or cache. - + Attempts to load storage configuration from the provided config first, then falls back to cached configuration if available. - + Args: config: Current configuration dictionary cached_config: Previously cached configuration dictionary storage_type: Type of storage to deserialize ('object' or 'nosql') - + Returns: Tuple[str, Dict[str, any]]: Storage implementation name and configuration """ @@ -164,12 +164,14 @@ def _deserialize_storage( return storage_impl, storage_config @staticmethod - def _deserialize(ret: "SelfHostedResources", config: Dict[str, any], cached_config: Dict[str, any]) -> None: + def _deserialize( + ret: "SelfHostedResources", config: Dict[str, any], cached_config: Dict[str, any] + ) -> None: """Deserialize storage configurations from config and cache data. - + Populates the SelfHostedResources instance with storage configurations loaded from the provided configuration and cached data. - + Args: ret: SelfHostedResources instance to populate config: Current configuration dictionary @@ -202,19 +204,19 @@ def _deserialize(ret: "SelfHostedResources", config: Dict[str, any], cached_conf class SelfHostedSystemResources(SystemResources): """System-level resource management for self-hosted storage deployments. - + This class manages the lifecycle and provisioning of self-hosted storage services, including MinIO object storage and ScyllaDB NoSQL storage. It handles container management, service initialization, and provides unified access to storage services. - + Attributes: _name: Name of the deployment _logging_handlers: Logging configuration handlers _storage: Active persistent storage instance (MinIO) _nosql_storage: Active NoSQL storage instance (ScyllaDB) """ - + def __init__( self, name: str, @@ -224,7 +226,7 @@ def __init__( logger_handlers: LoggingHandlers, ): """Initialize system resources for self-hosted storage. - + Args: name: Name of the deployment config: SeBS configuration object @@ -239,20 +241,19 @@ def __init__( self._storage: Optional[PersistentStorage] = None self._nosql_storage: Optional[NoSQLStorage] = None - def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: """Get or create a persistent storage instance. - + Creates a MinIO storage instance if one doesn't exist, or returns the existing instance. The storage is configured using the deployment's storage configuration. - + Args: replace_existing: Whether to replace existing buckets (optional) - + Returns: PersistentStorage: MinIO storage instance - + Raises: RuntimeError: If storage configuration is missing or unsupported """ @@ -285,14 +286,14 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStor def get_nosql_storage(self) -> NoSQLStorage: """Get or create a NoSQL storage instance. - + Creates a ScyllaDB storage instance if one doesn't exist, or returns the existing instance. The storage is configured using the deployment's NoSQL storage configuration. - + Returns: NoSQLStorage: ScyllaDB storage instance - + Raises: RuntimeError: If NoSQL storage configuration is missing or unsupported """ diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index 4e760c84..c299fc45 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -31,12 +31,12 @@ class ScyllaDB(NoSQLStorage): """ScyllaDB implementation for DynamoDB-compatible NoSQL storage. - + This class manages a ScyllaDB instance running in a Docker container, providing DynamoDB-compatible NoSQL storage through ScyllaDB's Alternator interface. It handles table creation, data operations, and container lifecycle management. - + Attributes: _docker_client: Docker client for container management _storage_container: Docker container running ScyllaDB @@ -45,11 +45,11 @@ class ScyllaDB(NoSQLStorage): _serializer: DynamoDB type serializer for data conversion client: Boto3 DynamoDB client configured for ScyllaDB """ - + @staticmethod def typename() -> str: """Get the qualified type name of this class. - + Returns: str: Full type name including deployment name """ @@ -58,7 +58,7 @@ def typename() -> str: @staticmethod def deployment_name() -> str: """Get the deployment platform name. - + Returns: str: Deployment name ('scylladb') """ @@ -67,7 +67,7 @@ def deployment_name() -> str: @property def config(self) -> ScyllaDBConfig: """Get the ScyllaDB configuration. - + Returns: ScyllaDBConfig: The configuration object """ @@ -84,7 +84,7 @@ def __init__( resources: Optional[Resources] = None, ): """Initialize a ScyllaDB storage instance. - + Args: docker_client: Docker client for managing the ScyllaDB container cache_client: Cache client for storing storage configuration @@ -111,14 +111,14 @@ def __init__( def start(self) -> None: """Start a ScyllaDB storage container. - + Creates and runs a Docker container with ScyllaDB, configuring it with the specified CPU and memory resources. The container runs in detached mode and exposes the Alternator DynamoDB-compatible API on the configured port. - + The method waits for ScyllaDB to fully initialize by checking the nodetool status until the service is ready. - + Raises: RuntimeError: If starting the ScyllaDB container fails or if ScyllaDB fails to initialize within the timeout period @@ -189,14 +189,14 @@ def start(self) -> None: def configure_connection(self) -> None: """Configure the connection to the ScyllaDB container. - + Determines the appropriate address to connect to the ScyllaDB container based on the host platform. For Linux, it uses the container's IP address, while for Windows, macOS, or WSL it uses localhost with the mapped port. - + Creates a boto3 DynamoDB client configured to connect to ScyllaDB's Alternator interface. - + Raises: RuntimeError: If the ScyllaDB container is not available or if the IP address cannot be detected @@ -229,7 +229,7 @@ def configure_connection(self) -> None: f"Incorrect detection of IP address for container with id {self._cfg.instance_id}" ) self.logging.info("Starting ScyllaDB instance at {}".format(self._cfg.address)) - + # Create the DynamoDB client for ScyllaDB's Alternator interface self.client = boto3.client( "dynamodb", @@ -241,7 +241,7 @@ def configure_connection(self) -> None: def stop(self) -> None: """Stop the ScyllaDB container. - + Gracefully stops the running ScyllaDB container if it exists. Logs an error if the container is not known. """ @@ -254,10 +254,10 @@ def stop(self) -> None: def envs(self) -> Dict[str, str]: """Generate environment variables for ScyllaDB configuration. - + Creates environment variables that can be used by benchmark functions to connect to the ScyllaDB storage instance. - + Returns: Dict[str, str]: Environment variables for ScyllaDB connection """ @@ -265,7 +265,7 @@ def envs(self) -> Dict[str, str]: def serialize(self) -> Tuple[StorageType, Dict[str, Any]]: """Serialize ScyllaDB configuration to a tuple. - + Returns: Tuple[StorageType, Dict[str, Any]]: Storage type and serialized configuration """ @@ -285,20 +285,20 @@ def _deserialize( cached_config: ScyllaDBConfig, cache_client: Cache, resources: Resources, obj_type: Type[T] ) -> T: """Deserialize a ScyllaDB instance from cached configuration with custom type. - + Creates a new instance of the specified class type from cached configuration data. This allows platform-specific versions to be deserialized correctly while sharing the core implementation. - + Args: cached_config: Cached ScyllaDB configuration cache_client: Cache client resources: Resources configuration obj_type: Type of object to create (a ScyllaDB subclass) - + Returns: T: Deserialized instance of the specified type - + Raises: RuntimeError: If the storage container does not exist """ @@ -320,14 +320,14 @@ def deserialize( cached_config: ScyllaDBConfig, cache_client: Cache, resources: Resources ) -> "ScyllaDB": """Deserialize a ScyllaDB instance from cached configuration. - + Creates a new ScyllaDB instance from cached configuration data. - + Args: cached_config: Cached ScyllaDB configuration cache_client: Cache client resources: Resources configuration - + Returns: ScyllaDB: Deserialized ScyllaDB instance """ @@ -335,13 +335,13 @@ def deserialize( def retrieve_cache(self, benchmark: str) -> bool: """Retrieve cached table configuration for a benchmark. - + Checks if table configuration for the given benchmark is already loaded in memory, and if not, attempts to load it from the cache. - + Args: benchmark: Name of the benchmark - + Returns: bool: True if table configuration was found, False otherwise """ @@ -357,10 +357,10 @@ def retrieve_cache(self, benchmark: str) -> bool: def update_cache(self, benchmark: str) -> None: """Update the cache with table configuration for a benchmark. - + Stores the table configuration for the specified benchmark in the cache for future retrieval. - + Args: benchmark: Name of the benchmark """ @@ -374,10 +374,10 @@ def update_cache(self, benchmark: str) -> None: def get_tables(self, benchmark: str) -> Dict[str, str]: """Get the table name mappings for a benchmark. - + Args: benchmark: Name of the benchmark - + Returns: Dict[str, str]: Mapping from original table names to actual table names """ @@ -385,11 +385,11 @@ def get_tables(self, benchmark: str) -> Dict[str, str]: def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: """Get the actual table name for a benchmark's logical table name. - + Args: benchmark: Name of the benchmark table: Logical table name - + Returns: Optional[str]: Actual table name or None if not found """ @@ -410,17 +410,17 @@ def write_to_table( secondary_key: Optional[Tuple[str, str]] = None, ) -> None: """Write data to a DynamoDB table in ScyllaDB. - + Serializes the data using DynamoDB type serialization and writes it to the specified table with the provided primary and optional secondary keys. - + Args: benchmark: Name of the benchmark table: Logical table name data: Data to write to the table primary_key: Tuple of (key_name, key_value) for the primary key secondary_key: Optional tuple of (key_name, key_value) for the secondary key - + Raises: AssertionError: If the table name is not found """ @@ -434,28 +434,27 @@ def write_to_table( serialized_data = {k: self._serializer.serialize(v) for k, v in data.items()} self.client.put_item(TableName=table_name, Item=serialized_data) - def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: """Create a DynamoDB table in ScyllaDB. - + Creates a new DynamoDB table with the specified primary key and optional secondary key. The table name is constructed to be unique across benchmarks and resource groups. - + Note: Unlike cloud providers with hierarchical database structures, ScyllaDB requires unique table names at the cluster level. - + Args: benchmark: Name of the benchmark name: Logical table name primary_key: Name of the primary key attribute secondary_key: Optional name of the secondary key attribute - + Returns: str: The actual table name that was created - + Raises: RuntimeError: If table creation fails for unknown reasons """ @@ -498,13 +497,13 @@ def create_table( def clear_table(self, name: str) -> str: """Clear all data from a table. - + Args: name: Name of the table to clear - + Returns: str: Table name - + Raises: NotImplementedError: This method is not yet implemented """ @@ -512,13 +511,13 @@ def clear_table(self, name: str) -> str: def remove_table(self, name: str) -> str: """Remove a table completely. - + Args: name: Name of the table to remove - + Returns: str: Table name - + Raises: NotImplementedError: This method is not yet implemented """ diff --git a/sebs/types.py b/sebs/types.py index 988efbd0..617c0d11 100644 --- a/sebs/types.py +++ b/sebs/types.py @@ -10,29 +10,31 @@ class BenchmarkModule(str, Enum): """Types of benchmark modules. - + This enum defines the different types of benchmark modules that can be used by benchmark functions: - + - STORAGE: Object storage module for storing and retrieving files - NOSQL: NoSQL database module for storing and retrieving structured data """ + STORAGE = "storage" NOSQL = "nosql" class Platforms(str, Enum): """Supported serverless platforms. - + This enum defines the different serverless platforms supported by the benchmarking suite: - + - AWS: Amazon Web Services Lambda - AZURE: Microsoft Azure Functions - GCP: Google Cloud Platform Cloud Functions - LOCAL: Local execution environment - OPENWHISK: Apache OpenWhisk """ + AWS = "aws" AZURE = "azure" GCP = "gcp" @@ -42,15 +44,16 @@ class Platforms(str, Enum): class Storage(str, Enum): """Supported object storage services. - + This enum defines the different object storage services supported by the benchmarking suite: - + - AWS_S3: Amazon Simple Storage Service (S3) - AZURE_BLOB_STORAGE: Microsoft Azure Blob Storage - GCP_STORAGE: Google Cloud Storage - MINIO: MinIO object storage (local or self-hosted) """ + AWS_S3 = "aws-s3" AZURE_BLOB_STORAGE = "azure-blob-storage" GCP_STORAGE = "google-cloud-storage" @@ -59,15 +62,16 @@ class Storage(str, Enum): class NoSQLStorage(str, Enum): """Supported NoSQL database services. - + This enum defines the different NoSQL database services supported by the benchmarking suite: - + - AWS_DYNAMODB: Amazon DynamoDB - AZURE_COSMOSDB: Microsoft Azure Cosmos DB - GCP_DATASTORE: Google Cloud Datastore - SCYLLADB: ScyllaDB (compatible with Apache Cassandra) """ + AWS_DYNAMODB = "aws-dynamodb" AZURE_COSMOSDB = "azure-cosmosdb" GCP_DATASTORE = "google-cloud-datastore" diff --git a/sebs/utils.py b/sebs/utils.py index c232be43..95fbab80 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -30,10 +30,10 @@ def project_absolute_path(*paths: str) -> str: """ Join paths relative to the project root directory. - + Args: *paths: Path components to join - + Returns: str: Absolute path including the project directory """ @@ -43,20 +43,21 @@ def project_absolute_path(*paths: str) -> str: class JSONSerializer(json.JSONEncoder): """ Custom JSON encoder for objects with serialize method. - + This encoder handles objects by: 1. Using their serialize() method if available 2. Converting dictionaries to strings 3. Using vars() to get object attributes 4. Falling back to string representation """ + def default(self, o): """ Custom serialization for objects. - + Args: o: Object to serialize - + Returns: JSON serializable representation of the object """ @@ -74,10 +75,10 @@ def default(self, o): def serialize(obj) -> str: """ Serialize an object to a JSON string. - + Args: obj: Object to serialize - + Returns: str: JSON string representation of the object """ @@ -90,15 +91,15 @@ def serialize(obj) -> str: def execute(cmd, shell=False, cwd=None) -> str: """ Execute a shell command and capture its output, handling errors. - + Args: cmd: Command to execute (string) shell: Whether to use shell execution (enables wildcards, pipes, etc.) cwd: Working directory for command execution - + Returns: str: Command output as string - + Raises: RuntimeError: If command execution fails """ @@ -117,7 +118,7 @@ def execute(cmd, shell=False, cwd=None) -> str: def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]) -> None: """ Update a nested dictionary with a value at the specified key path. - + Args: cfg: Dictionary to update keys: List of keys forming a path to the value @@ -133,7 +134,7 @@ def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]) -> None def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]) -> None: """ Append a dictionary to a nested location in another dictionary. - + Args: cfg: Dictionary to update keys: List of keys forming a path to the value @@ -149,11 +150,11 @@ def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]) -> Non def find(name: str, path: str) -> Optional[str]: """ Find a directory with the given name in the specified path. - + Args: name: Directory name to find path: Path to search in - + Returns: str: Path to the found directory, or None if not found """ @@ -166,12 +167,12 @@ def find(name: str, path: str) -> Optional[str]: def create_output(directory: str, preserve_dir: bool, verbose: bool) -> str: """ Create or clean an output directory for benchmark results. - + Args: directory: Path to create preserve_dir: Whether to preserve existing directory verbose: Verbosity level for logging - + Returns: str: Absolute path to the output directory """ @@ -188,7 +189,7 @@ def create_output(directory: str, preserve_dir: bool, verbose: bool) -> str: def configure_logging() -> None: """ Configure global logging settings. - + Reduces noise from third-party libraries by setting their log levels to ERROR. This ensures that only important messages from these libraries are shown. """ @@ -203,14 +204,14 @@ def configure_logging() -> None: def find_benchmark(benchmark: str, path: str) -> Optional[str]: """ Locate directory corresponding to a benchmark in the repository. - - Searches for a benchmark directory in either the benchmarks or + + Searches for a benchmark directory in either the benchmarks or benchmarks-data directories. - + Args: benchmark: Benchmark name path: Path for lookup, relative to repository (usually 'benchmarks' or 'benchmarks-data') - + Returns: str: Path to benchmark directory, or None if not found """ @@ -222,7 +223,7 @@ def find_benchmark(benchmark: str, path: str) -> Optional[str]: def global_logging() -> None: """ Set up basic global logging configuration. - + Configures the root logger with a standard format, timestamp, and INFO level. This provides a baseline for all logging in the application. """ @@ -234,11 +235,11 @@ def global_logging() -> None: class ColoredWrapper: """ Wrapper for logging with colored console output. - + This class provides formatted, colorized logging output for better readability in terminal environments. It optionally propagates messages to the standard Python logger. - + Attributes: SUCCESS: Green color code for success messages STATUS: Blue color code for status/info messages @@ -247,6 +248,7 @@ class ColoredWrapper: BOLD: Bold text formatting code END: Code to reset text formatting """ + SUCCESS = "\033[92m" STATUS = "\033[94m" WARNING = "\033[93m" @@ -257,7 +259,7 @@ class ColoredWrapper: def __init__(self, prefix, logger, verbose=True, propagte=False): """ Initialize the colored logging wrapper. - + Args: prefix: Prefix for log messages (usually class name) logger: Python logger to propagate to @@ -272,7 +274,7 @@ def __init__(self, prefix, logger, verbose=True, propagte=False): def debug(self, message): """ Log a debug message. - + Args: message: The message to log """ @@ -284,7 +286,7 @@ def debug(self, message): def info(self, message): """ Log an informational message. - + Args: message: The message to log """ @@ -295,7 +297,7 @@ def info(self, message): def warning(self, message): """ Log a warning message. - + Args: message: The message to log """ @@ -306,7 +308,7 @@ def warning(self, message): def error(self, message): """ Log an error message. - + Args: message: The message to log """ @@ -317,7 +319,7 @@ def error(self, message): def critical(self, message): """ Log a critical error message. - + Args: message: The message to log """ @@ -328,7 +330,7 @@ def critical(self, message): def _print(self, message, color): """ Print a formatted message to the console. - + Args: message: The message to print color: ANSI color code to use @@ -343,19 +345,19 @@ def _print(self, message, color): class LoggingHandlers: """ Configures and manages logging handlers. - + This class sets up handlers for logging to files and tracks verbosity settings for use with ColoredWrapper. - + Attributes: handler: FileHandler for logging to a file verbosity: Whether to include debug-level messages """ - + def __init__(self, verbose: bool = False, filename: Optional[str] = None): """ Initialize logging handlers. - + Args: verbose: Whether to include debug-level messages filename: Optional file to log to @@ -379,20 +381,20 @@ def __init__(self, verbose: bool = False, filename: Optional[str] = None): class LoggingBase: """ Base class providing consistent logging functionality across the framework. - + This class sets up a logger with a unique identifier and provides methods for logging at different levels with consistent formatting. It supports both console output with color coding and optional file logging. - + Attributes: log_name: Unique identifier for this logger logging: ColoredWrapper for formatted console output """ - + def __init__(self): """ Initialize the logging base with a unique identifier. - + Creates a unique name for the logger based on class name and a random ID, then configures a standard logger and colored wrapper. """ @@ -410,7 +412,7 @@ def __init__(self): def logging(self) -> ColoredWrapper: """ Get the colored logging wrapper. - + Returns: ColoredWrapper: The logging wrapper for this instance """ @@ -422,7 +424,7 @@ def logging(self) -> ColoredWrapper: def logging_handlers(self) -> LoggingHandlers: """ Get the logging handlers configuration. - + Returns: LoggingHandlers: The current handlers configuration """ @@ -432,7 +434,7 @@ def logging_handlers(self) -> LoggingHandlers: def logging_handlers(self, handlers: LoggingHandlers): """ Set new logging handlers configuration. - + Args: handlers: The new handlers configuration to use """ @@ -453,12 +455,12 @@ def logging_handlers(self, handlers: LoggingHandlers): def has_platform(name: str) -> bool: """ Check if a specific platform is enabled via environment variable. - + Looks for SEBS_WITH_{name} environment variable set to 'true'. - + Args: name: Platform name to check - + Returns: bool: True if platform is enabled, False otherwise """ @@ -468,7 +470,7 @@ def has_platform(name: str) -> bool: def is_linux() -> bool: """ Check if the system is Linux and not Windows Subsystem for Linux. - + Returns: bool: True if native Linux, False otherwise """ @@ -478,7 +480,7 @@ def is_linux() -> bool: def catch_interrupt() -> None: """ Set up a signal handler to catch interrupt signals (Ctrl+C). - + Prints a stack trace and exits when an interrupt is received. This helps with debugging by showing the execution context at the time of the interruption. @@ -490,7 +492,7 @@ def catch_interrupt() -> None: def handler(x, y): """ Handle interrupt signal by printing stack trace and exiting. - + Args: x: Signal number y: Frame object From 4d6ae784f3f3d61e79645dfd897dd3fe699924b8 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 23 Jun 2025 17:09:01 +0200 Subject: [PATCH 08/21] [openwhisk] Add correct logic for loading user and cached config --- sebs/openwhisk/config.py | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py index 8ddfa65c..487debfa 100644 --- a/sebs/openwhisk/config.py +++ b/sebs/openwhisk/config.py @@ -195,7 +195,7 @@ def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) ret, cached_config["resources"] ) - ret._deserialize(ret, config, cached_config) + ret._deserialize(ret, config, cached_config or {}) # Check for new config - overrides but check if it's different if "docker_registry" in config: @@ -293,7 +293,9 @@ class OpenWhiskConfig(Config): experimentalManifest: bool cache: Cache - def __init__(self, config: Dict[str, Any], cache: Cache) -> None: + def __init__( + self, resources: OpenWhiskResources, credentials: OpenWhiskCredentials, cache: Cache + ) -> None: """ Initialize OpenWhisk configuration. @@ -302,13 +304,8 @@ def __init__(self, config: Dict[str, Any], cache: Cache) -> None: cache: Cache instance for configuration persistence """ super().__init__(name="openwhisk") - self._credentials = OpenWhiskCredentials() - self._resources = OpenWhiskResources() - self.shutdownStorage = config["shutdownStorage"] - self.removeCluster = config["removeCluster"] - self.wsk_exec = config["wskExec"] - self.wsk_bypass_security = config["wskBypassSecurity"] - self.experimentalManifest = config["experimentalManifest"] + self._credentials = credentials + self._resources = resources self.cache = cache @property @@ -334,13 +331,21 @@ def resources(self) -> OpenWhiskResources: @staticmethod def initialize(cfg: Config, dct: Dict[str, Any]) -> None: """ - Initialize configuration from dictionary (currently no-op). + Initialize configuration from dictionary. Args: cfg: Configuration instance to initialize dct: Dictionary containing initialization data """ - pass + + config = cast(OpenWhiskConfig, cfg) + config._region = dct["region"] + + config.shutdownStorage = dct["shutdownStorage"] + config.removeCluster = dct["removeCluster"] + config.wsk_exec = dct["wskExec"] + config.wsk_bypass_security = dct["wskBypassSecurity"] + config.experimentalManifest = dct["experimentalManifest"] def serialize(self) -> Dict[str, Any]: """ @@ -374,14 +379,22 @@ def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) Returns: OpenWhiskConfig instance with deserialized configuration """ - cached_config = cache.get_config("openwhisk") resources = cast( OpenWhiskResources, OpenWhiskResources.deserialize(config, cache, handlers) ) - res = OpenWhiskConfig(config, cached_config) + res = OpenWhiskConfig(resources, OpenWhiskCredentials(), cache) res.logging_handlers = handlers - res._resources = resources + + cached_config = cache.get_config("openwhisk") + + if cached_config: + res.logging.info("Loading cached config for OpenWhisk") + OpenWhiskConfig.initialize(res, cached_config) + else: + res.logging.info("Using user-provided config for GCP") + OpenWhiskConfig.initialize(res, config) + return res def update_cache(self, cache: Cache) -> None: From 4a5cf8ac370353f81d482d1f91a878dca3d64118 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 23 Jun 2025 17:29:11 +0200 Subject: [PATCH 09/21] [system] Linting --- sebs/aws/triggers.py | 13 ++++++++++--- sebs/azure/azure.py | 15 ++++++++------- sebs/benchmark.py | 13 ++++++++++--- sebs/experiments/eviction_model.py | 15 ++++++++------- sebs/experiments/invocation_overhead.py | 3 ++- sebs/experiments/perf_cost.py | 10 ++++++++-- sebs/gcp/config.py | 2 +- sebs/gcp/gcp.py | 18 +----------------- sebs/openwhisk/config.py | 2 +- sebs/storage/resources.py | 18 +++++++++--------- 10 files changed, 58 insertions(+), 51 deletions(-) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f867d749..4e7e3484 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -128,7 +128,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: aws_result.parse_benchmark_output(json.loads(function_output["body"])) return aws_result - def async_invoke(self, payload: dict) -> dict: + def async_invoke(self, payload: dict) -> concurrent.futures.Future: """Asynchronously invoke the Lambda function. Triggers the Lambda function asynchronously without waiting for @@ -138,13 +138,16 @@ def async_invoke(self, payload: dict) -> dict: payload: Dictionary payload to send to the function Returns: - dict: AWS Lambda invocation response + concurrent.futures.Future: Future object representing the async invocation Raises: RuntimeError: If the async invocation fails """ # FIXME: proper return type + self.logging.warning( + "Async invoke for AWS Lambda library trigger does not wait for completion!" + ) serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() ret = client.invoke( @@ -157,7 +160,11 @@ def async_invoke(self, payload: dict) -> dict: self.logging.error("Async invocation of {} failed!".format(self.name)) self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) raise RuntimeError() - return ret + + # Create a completed future with the result + future: concurrent.futures.Future = concurrent.futures.Future() + future.set_result(ret) + return future def serialize(self) -> dict: """Serialize the trigger to a dictionary. diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index e88cf9db..b5be2243 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -323,8 +323,8 @@ def publish_function( ) ) url = "" - for line in ret.split(b"\n"): - line = line.decode("utf-8") + ret_str = ret.decode("utf-8") + for line in ret_str.split("\n"): if "Invoke url:" in line: url = line.split("Invoke url:")[1].strip() break @@ -761,7 +761,7 @@ def download_metrics( invocations_to_process = set(requests.keys()) # while len(invocations_processed) < len(requests.keys()): self.logging.info("Azure: Running App Insights query.") - ret = self.cli_instance.execute( + ret_bytes = self.cli_instance.execute( ( 'az monitor app-insights query --app {} --analytics-query "{}" ' "--start-time {} {} --end-time {} {}" @@ -773,11 +773,12 @@ def download_metrics( end_time_str, timezone_str, ) - ).decode("utf-8") - ret = json.loads(ret) - ret = ret["tables"][0] + ) + ret_str = ret_bytes.decode("utf-8") + json_data = json.loads(ret_str) + table_data = json_data["tables"][0] # time is last, invocation is second to last - for request in ret["rows"]: + for request in table_data["rows"]: invocation_id = request[-2] # might happen that we get invocation from another experiment if invocation_id not in requests: diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 7fedfd5c..2eb20685 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -179,6 +179,8 @@ class Benchmark(LoggingBase): """ + _hash_value: Optional[str] + @staticmethod def typename() -> str: """ @@ -207,6 +209,7 @@ def benchmark_path(self) -> str: Returns: str: Path to the benchmark directory """ + assert self._benchmark_path is not None return self._benchmark_path @property @@ -220,13 +223,14 @@ def benchmark_config(self) -> BenchmarkConfig: return self._benchmark_config @property - def code_package(self) -> dict: + def code_package(self) -> Dict[str, Any]: """ Get the code package information. Returns: - dict: Dictionary with code package information + Dict[str, Any]: Dictionary with code package information """ + assert self._code_package is not None return self._code_package @property @@ -237,6 +241,7 @@ def functions(self) -> Dict[str, Any]: Returns: Dict[str, Any]: Dictionary of functions """ + assert self._functions is not None return self._functions @property @@ -250,6 +255,7 @@ def code_location(self) -> str: if self.code_package: return os.path.join(self._cache_client.cache_dir, self.code_package["location"]) else: + assert self._code_location is not None return self._code_location @property @@ -474,7 +480,7 @@ def __init__( self._cache_client = cache_client self._docker_client = docker_client self._system_config = system_config - self._hash_value = None + self._code_location: Optional[str] = None self._output_dir = os.path.join( output_dir, f"{benchmark}_code", @@ -980,6 +986,7 @@ def prepare_input( # buckets = mod.buckets_count() # storage.allocate_buckets(self.benchmark, buckets) # Get JSON and upload data as required by benchmark + assert self._benchmark_data_path is not None input_config = self._benchmark_input_module.generate_input( self._benchmark_data_path, size, bucket, input, output, storage_func, nosql_func ) diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 1d70d0b6..9a2b2357 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -14,9 +14,10 @@ import os import time from datetime import datetime -from typing import List, Optional, Tuple, TYPE_CHECKING +from typing import List, Optional, Tuple, TYPE_CHECKING, Dict, Any import multiprocessing from multiprocessing.pool import AsyncResult, ThreadPool +from typing import cast from sebs.faas.system import System as FaaSSystem from sebs.faas.function import Function, Trigger @@ -341,7 +342,7 @@ def run(self) -> None: # flake8 issue # https://github.com/PyCQA/pycodestyle/issues/373 functions = self.functions[invocation_idx :: self.function_copies_per_time] # noqa - results = {} + results: Dict[int, List[List[Dict[str, Any]]]] = {} # Disable logging - otherwise we have RLock that can't get be pickled for func in functions: @@ -374,8 +375,8 @@ def run(self) -> None: # time.sleep(5) for _, t in enumerate(self.times): results[t].append([]) - local_results = [] - servers_results = [] + local_results: List[AsyncResult] = [] + servers_results: List[AsyncResult] = [] """ Start M server instances. Each one handles one set of invocations. @@ -406,11 +407,11 @@ def run(self) -> None: Rethrow exceptions if appear """ for result in servers_results: - ret = result.get() + servers_ret = result.get() for result in local_results: - ret = result.get() - for i, val in enumerate(ret): + local_ret = result.get() + for i, val in enumerate(local_ret): results[self.times[i]][-1].append(val) """ diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index f5e3d70c..9ded288f 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -18,7 +18,7 @@ import random import time from datetime import datetime -from typing import Dict, List, TYPE_CHECKING +from typing import Dict, List, TYPE_CHECKING, Union from sebs.benchmark import Benchmark from sebs.faas.system import System as FaaSSystem @@ -220,6 +220,7 @@ def run(self) -> None: repetitions = self.settings["repetitions"] N = self.settings["N"] + experiment: Union[CodePackageSize, PayloadSize] if self.settings["type"] == "code": experiment = CodePackageSize(self._deployment_client, self._benchmark, self.settings) else: diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 0e5b7cb0..faa669a5 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -172,7 +172,12 @@ def run(self) -> None: self.logging.info(f"Begin experiment on memory size {memory}") # Update function memory configuration self._function.config.memory = memory - self._deployment_client.update_function(self._function, self._benchmark, False, "") + self._deployment_client.update_function( + self._function, + self._benchmark, + self._benchmark.container_deployment, + self._benchmark.container_uri if self._benchmark.container_deployment else "", + ) self._sebs_client.cache_client.update_function(self._function) # Run experiment with this memory configuration self.run_configuration(settings, settings["repetitions"], suffix=str(memory)) @@ -485,7 +490,8 @@ def process( times = experiments.times() deployment_client.download_metrics( func, - *times, + int(times[0]), + int(times[1]), experiments.invocations(func), experiments.metrics(func), ) diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 84b04c9b..a6094dc6 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -363,7 +363,7 @@ def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Confi GCPConfig.initialize(config_obj, config) # mypy makes a mistake here - updated_keys: List[Tuple[str, Tuple[str]]] = [["region", ["gcp", "region"]]] # type: ignore + updated_keys: List[Tuple[str, List[str]]] = [("region", ["gcp", "region"])] # type: ignore # for each attribute here, check if its version is different than the one provided by # user; if yes, then update the value for config_key, keys in updated_keys: diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index c13253cf..64709ae3 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -19,7 +19,7 @@ Example: Basic GCP system initialization: - + config = GCPConfig(credentials, resources) gcp_system = GCP(system_config, config, cache, docker_client, logging_handlers) gcp_system.initialize() @@ -736,22 +736,6 @@ def get_full_function_name(project_name: str, location: str, func_name: str) -> """ return f"projects/{project_name}/locations/{location}/functions/{func_name}" - def prepare_experiment(self, benchmark: str) -> str: - """Prepare storage resources for benchmark experiment. - - Creates a dedicated storage bucket for experiment logs and outputs. - - Args: - benchmark: Name of the benchmark being prepared - - Returns: - Name of the created logs storage bucket - """ - logs_bucket = self._system_resources.get_storage().add_output_bucket( - benchmark, suffix="logs" - ) - return logs_bucket - def shutdown(self) -> None: """Shutdown the GCP system and clean up resources. diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py index 487debfa..4fa5fbea 100644 --- a/sebs/openwhisk/config.py +++ b/sebs/openwhisk/config.py @@ -13,7 +13,7 @@ from __future__ import annotations -from typing import Optional, cast, Dict, Any +from typing import Optional, cast, Dict from sebs.cache import Cache from sebs.faas.config import Credentials, Resources, Config diff --git a/sebs/storage/resources.py b/sebs/storage/resources.py index dea7145a..2704d4d6 100644 --- a/sebs/storage/resources.py +++ b/sebs/storage/resources.py @@ -18,7 +18,7 @@ """ import docker -from typing import cast, Dict, Optional, Tuple +from typing import cast, Dict, Optional, Tuple, Any from sebs.cache import Cache from sebs.faas.config import Config, Resources @@ -84,13 +84,13 @@ def nosql_storage_config(self) -> Optional[NoSQLStorageConfig]: """ return self._nosql_storage - def serialize(self) -> Dict[str, any]: + def serialize(self) -> Dict[str, Any]: """Serialize the resource configuration to a dictionary. Returns: - Dict[str, any]: Serialized configuration containing storage and/or nosql sections + Dict[str, Any]: Serialized configuration containing storage and/or nosql sections """ - out: Dict[str, any] = {} + out: Dict[str, Any] = {} if self._object_storage is not None: out = {**out, "storage": self._object_storage.serialize()} @@ -120,8 +120,8 @@ def update_cache(self, cache: Cache) -> None: ) def _deserialize_storage( - self, config: Dict[str, any], cached_config: Optional[Dict[str, any]], storage_type: str - ) -> Tuple[str, Dict[str, any]]: + self, config: Dict[str, Any], cached_config: Optional[Dict[str, Any]], storage_type: str + ) -> Tuple[str, Dict[str, Any]]: """Deserialize storage configuration from config or cache. Attempts to load storage configuration from the provided config first, @@ -133,10 +133,10 @@ def _deserialize_storage( storage_type: Type of storage to deserialize ('object' or 'nosql') Returns: - Tuple[str, Dict[str, any]]: Storage implementation name and configuration + Tuple[str, Dict[str, Any]]: Storage implementation name and configuration """ storage_impl = "" - storage_config: Dict[str, any] = {} + storage_config: Dict[str, Any] = {} # Check for new config if "storage" in config and storage_type in config["storage"]: @@ -165,7 +165,7 @@ def _deserialize_storage( @staticmethod def _deserialize( - ret: "SelfHostedResources", config: Dict[str, any], cached_config: Dict[str, any] + ret: "SelfHostedResources", config: Dict[str, Any], cached_config: Optional[Dict[str, Any]] ) -> None: """Deserialize storage configurations from config and cache data. From 611d162dd291f242fd7a64231bca4a4aff9b833d Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 23 Jun 2025 17:55:53 +0200 Subject: [PATCH 10/21] [docs] Update sphinx config --- docs/source/api/sebs.aws.rst | 26 +++++++++++++++++----- docs/source/api/sebs.azure.rst | 32 ++++++++++++++++++++++++---- docs/source/api/sebs.experiments.rst | 20 ++++++++--------- docs/source/api/sebs.faas.rst | 26 +++++++++++++++++----- docs/source/api/sebs.gcp.rst | 32 ++++++++++++++++++++++++---- docs/source/api/sebs.local.rst | 18 +++++----------- docs/source/api/sebs.openwhisk.rst | 18 +++++----------- docs/source/api/sebs.rst | 24 +++++++-------------- docs/source/api/sebs.storage.rst | 20 +++++++++++++++-- docs/source/conf.py | 26 +++++++++++----------- 10 files changed, 156 insertions(+), 86 deletions(-) diff --git a/docs/source/api/sebs.aws.rst b/docs/source/api/sebs.aws.rst index 23b3df24..e0e5eaf1 100644 --- a/docs/source/api/sebs.aws.rst +++ b/docs/source/api/sebs.aws.rst @@ -9,53 +9,69 @@ sebs.aws.aws module .. automodule:: sebs.aws.aws :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.aws.config module ---------------------- .. automodule:: sebs.aws.config :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.aws.container module ------------------------- .. automodule:: sebs.aws.container :members: + :show-inheritance: :undoc-members: + +sebs.aws.dynamodb module +------------------------ + +.. automodule:: sebs.aws.dynamodb + :members: :show-inheritance: + :undoc-members: sebs.aws.function module ------------------------ .. automodule:: sebs.aws.function :members: + :show-inheritance: :undoc-members: + +sebs.aws.resources module +------------------------- + +.. automodule:: sebs.aws.resources + :members: :show-inheritance: + :undoc-members: sebs.aws.s3 module ------------------ .. automodule:: sebs.aws.s3 :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.aws.triggers module ------------------------ .. automodule:: sebs.aws.triggers :members: - :undoc-members: :show-inheritance: + :undoc-members: Module contents --------------- .. automodule:: sebs.aws :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.azure.rst b/docs/source/api/sebs.azure.rst index d172aabf..059bfb90 100644 --- a/docs/source/api/sebs.azure.rst +++ b/docs/source/api/sebs.azure.rst @@ -9,53 +9,77 @@ sebs.azure.azure module .. automodule:: sebs.azure.azure :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.azure.blob\_storage module ------------------------------- .. automodule:: sebs.azure.blob_storage :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.azure.cli module --------------------- .. automodule:: sebs.azure.cli :members: + :show-inheritance: :undoc-members: + +sebs.azure.cloud\_resources module +---------------------------------- + +.. automodule:: sebs.azure.cloud_resources + :members: :show-inheritance: + :undoc-members: sebs.azure.config module ------------------------ .. automodule:: sebs.azure.config :members: + :show-inheritance: :undoc-members: + +sebs.azure.cosmosdb module +-------------------------- + +.. automodule:: sebs.azure.cosmosdb + :members: :show-inheritance: + :undoc-members: sebs.azure.function module -------------------------- .. automodule:: sebs.azure.function :members: + :show-inheritance: :undoc-members: + +sebs.azure.system\_resources module +----------------------------------- + +.. automodule:: sebs.azure.system_resources + :members: :show-inheritance: + :undoc-members: sebs.azure.triggers module -------------------------- .. automodule:: sebs.azure.triggers :members: - :undoc-members: :show-inheritance: + :undoc-members: Module contents --------------- .. automodule:: sebs.azure :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.experiments.rst b/docs/source/api/sebs.experiments.rst index 96e753e6..bc3ee1ab 100644 --- a/docs/source/api/sebs.experiments.rst +++ b/docs/source/api/sebs.experiments.rst @@ -9,77 +9,77 @@ sebs.experiments.config module .. automodule:: sebs.experiments.config :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.experiments.environment module ----------------------------------- .. automodule:: sebs.experiments.environment :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.experiments.eviction\_model module --------------------------------------- .. automodule:: sebs.experiments.eviction_model :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.experiments.experiment module ---------------------------------- .. automodule:: sebs.experiments.experiment :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.experiments.invocation\_overhead module -------------------------------------------- .. automodule:: sebs.experiments.invocation_overhead :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.experiments.network\_ping\_pong module ------------------------------------------- .. automodule:: sebs.experiments.network_ping_pong :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.experiments.perf\_cost module ---------------------------------- .. automodule:: sebs.experiments.perf_cost :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.experiments.result module ------------------------------ .. automodule:: sebs.experiments.result :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.experiments.startup\_time module ------------------------------------- .. automodule:: sebs.experiments.startup_time :members: - :undoc-members: :show-inheritance: + :undoc-members: Module contents --------------- .. automodule:: sebs.experiments :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.faas.rst b/docs/source/api/sebs.faas.rst index 56258845..61c33331 100644 --- a/docs/source/api/sebs.faas.rst +++ b/docs/source/api/sebs.faas.rst @@ -9,45 +9,61 @@ sebs.faas.config module .. automodule:: sebs.faas.config :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.faas.container module -------------------------- .. automodule:: sebs.faas.container :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.faas.function module ------------------------- .. automodule:: sebs.faas.function :members: + :show-inheritance: :undoc-members: + +sebs.faas.nosql module +---------------------- + +.. automodule:: sebs.faas.nosql + :members: :show-inheritance: + :undoc-members: + +sebs.faas.resources module +-------------------------- + +.. automodule:: sebs.faas.resources + :members: + :show-inheritance: + :undoc-members: sebs.faas.storage module ------------------------ .. automodule:: sebs.faas.storage :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.faas.system module ----------------------- .. automodule:: sebs.faas.system :members: - :undoc-members: :show-inheritance: + :undoc-members: Module contents --------------- .. automodule:: sebs.faas :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.gcp.rst b/docs/source/api/sebs.gcp.rst index ffcd9920..eb4e93e4 100644 --- a/docs/source/api/sebs.gcp.rst +++ b/docs/source/api/sebs.gcp.rst @@ -4,50 +4,74 @@ sebs.gcp package Submodules ---------- +sebs.gcp.cli module +------------------- + +.. automodule:: sebs.gcp.cli + :members: + :show-inheritance: + :undoc-members: + sebs.gcp.config module ---------------------- .. automodule:: sebs.gcp.config :members: + :show-inheritance: :undoc-members: + +sebs.gcp.datastore module +------------------------- + +.. automodule:: sebs.gcp.datastore + :members: :show-inheritance: + :undoc-members: sebs.gcp.function module ------------------------ .. automodule:: sebs.gcp.function :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.gcp.gcp module ------------------- .. automodule:: sebs.gcp.gcp :members: + :show-inheritance: :undoc-members: + +sebs.gcp.resources module +------------------------- + +.. automodule:: sebs.gcp.resources + :members: :show-inheritance: + :undoc-members: sebs.gcp.storage module ----------------------- .. automodule:: sebs.gcp.storage :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.gcp.triggers module ------------------------ .. automodule:: sebs.gcp.triggers :members: - :undoc-members: :show-inheritance: + :undoc-members: Module contents --------------- .. automodule:: sebs.gcp :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.local.rst b/docs/source/api/sebs.local.rst index 32a34143..569956ad 100644 --- a/docs/source/api/sebs.local.rst +++ b/docs/source/api/sebs.local.rst @@ -9,53 +9,45 @@ sebs.local.config module .. automodule:: sebs.local.config :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.local.deployment module ---------------------------- .. automodule:: sebs.local.deployment :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.local.function module -------------------------- .. automodule:: sebs.local.function :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.local.local module ----------------------- .. automodule:: sebs.local.local :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.local.measureMem module ---------------------------- .. automodule:: sebs.local.measureMem :members: - :undoc-members: :show-inheritance: - -sebs.local.storage module -------------------------- - -.. automodule:: sebs.local.storage - :members: :undoc-members: - :show-inheritance: Module contents --------------- .. automodule:: sebs.local :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.openwhisk.rst b/docs/source/api/sebs.openwhisk.rst index 2588e594..7b68da2d 100644 --- a/docs/source/api/sebs.openwhisk.rst +++ b/docs/source/api/sebs.openwhisk.rst @@ -9,53 +9,45 @@ sebs.openwhisk.config module .. automodule:: sebs.openwhisk.config :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.openwhisk.container module ------------------------------- .. automodule:: sebs.openwhisk.container :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.openwhisk.function module ------------------------------ .. automodule:: sebs.openwhisk.function :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.openwhisk.openwhisk module ------------------------------- .. automodule:: sebs.openwhisk.openwhisk :members: - :undoc-members: :show-inheritance: - -sebs.openwhisk.storage module ------------------------------ - -.. automodule:: sebs.openwhisk.storage - :members: :undoc-members: - :show-inheritance: sebs.openwhisk.triggers module ------------------------------ .. automodule:: sebs.openwhisk.triggers :members: - :undoc-members: :show-inheritance: + :undoc-members: Module contents --------------- .. automodule:: sebs.openwhisk :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.rst b/docs/source/api/sebs.rst index a400e0f6..db708110 100644 --- a/docs/source/api/sebs.rst +++ b/docs/source/api/sebs.rst @@ -24,77 +24,69 @@ sebs.benchmark module .. automodule:: sebs.benchmark :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.cache module ----------------- .. automodule:: sebs.cache :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.config module ------------------ .. automodule:: sebs.config :members: - :undoc-members: :show-inheritance: - -sebs.regression module ----------------------- - -.. automodule:: sebs.regression - :members: :undoc-members: - :show-inheritance: sebs.sebs module ---------------- .. automodule:: sebs.sebs :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.statistics module ---------------------- .. automodule:: sebs.statistics :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.types module ----------------- .. automodule:: sebs.types :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.utils module ----------------- .. automodule:: sebs.utils :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.version module ------------------- .. automodule:: sebs.version :members: - :undoc-members: :show-inheritance: + :undoc-members: Module contents --------------- .. automodule:: sebs :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.storage.rst b/docs/source/api/sebs.storage.rst index dcd2f765..aebb9aa6 100644 --- a/docs/source/api/sebs.storage.rst +++ b/docs/source/api/sebs.storage.rst @@ -9,21 +9,37 @@ sebs.storage.config module .. automodule:: sebs.storage.config :members: - :undoc-members: :show-inheritance: + :undoc-members: sebs.storage.minio module ------------------------- .. automodule:: sebs.storage.minio :members: + :show-inheritance: + :undoc-members: + +sebs.storage.resources module +----------------------------- + +.. automodule:: sebs.storage.resources + :members: + :show-inheritance: :undoc-members: + +sebs.storage.scylladb module +---------------------------- + +.. automodule:: sebs.storage.scylladb + :members: :show-inheritance: + :undoc-members: Module contents --------------- .. automodule:: sebs.storage :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/conf.py b/docs/source/conf.py index 2fb1da58..d61af4cf 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -8,28 +8,26 @@ import os import sys -sys.path.insert(0, os.path.abspath('../..')) -#sys.path.insert(0, os.path.abspath('../../sebs')) -project = 'sebs' -copyright = '2024, Marcin Copik' -author = 'Marcin Copik' -release = '1.2' +sys.path.insert(0, os.path.abspath("../..")) +# sys.path.insert(0, os.path.abspath('../../sebs')) + +project = "sebs" +copyright = "2024, Marcin Copik" +author = "Marcin Copik" +release = "1.2" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode' -] +# extensions = ["sphinx.ext.napoleon", "sphinx.ext.autodoc", "sphinx.ext.viewcode"] +extensions = ["sphinx.ext.napoleon", "sphinx.ext.viewcode"] -templates_path = ['_templates'] +templates_path = ["_templates"] exclude_patterns = [] # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = 'sphinx_rtd_theme' -html_static_path = ['_static'] +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] From 7858853b3c4dadcd3521f32b94389063935d8182 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 23 Jun 2025 17:56:04 +0200 Subject: [PATCH 11/21] [syhstem] Lintiong --- sebs/openwhisk/config.py | 2 +- sebs/openwhisk/openwhisk.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py index 4fa5fbea..487debfa 100644 --- a/sebs/openwhisk/config.py +++ b/sebs/openwhisk/config.py @@ -13,7 +13,7 @@ from __future__ import annotations -from typing import Optional, cast, Dict +from typing import Optional, cast, Dict, Any from sebs.cache import Cache from sebs.faas.config import Credentials, Resources, Config diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index b18d1d86..29a51178 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -12,7 +12,7 @@ import os import subprocess -from typing import cast, Dict, List, Optional, Tuple, Type, Any +from typing import cast, Dict, List, Optional, Tuple, Type import docker From b764274e3ab11ab2a3a52a5c94ee41cd586a067c Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 23 Jun 2025 19:24:50 +0200 Subject: [PATCH 12/21] [docs] Add YAML config --- .readthedocs.yaml | 28 ++++++++++++++++++++++++++++ requirements.docs.txt | 2 ++ 2 files changed, 30 insertions(+) create mode 100644 .readthedocs.yaml create mode 100644 requirements.docs.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..0ee7c56a --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,28 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version, and other tools you might need +build: + os: ubuntu-24.04 + tools: + python: "3.13" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# Optionally, but recommended, +# declare the Python requirements required to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: requirements.docs.txt + - requirements: requirements.txt + - requirements: requirements.aws.txt + - requirements: requirements.azure.txt + - requirements: requirements.gcp.txt + - requirements: requirements.local.txt + diff --git a/requirements.docs.txt b/requirements.docs.txt new file mode 100644 index 00000000..c17ae307 --- /dev/null +++ b/requirements.docs.txt @@ -0,0 +1,2 @@ +Sphinx==8.2.3 +sphinx-rtd-theme==3.0.2 From d67d27512767e1032ee981cca5b794cf675e2fd9 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 25 Jun 2025 00:37:19 +0200 Subject: [PATCH 13/21] [system] Linting --- .readthedocs.yaml | 2 +- sebs/azure/__init__.py | 7 +++---- sebs/azure/azure.py | 8 ++++---- sebs/azure/blob_storage.py | 7 +++---- sebs/azure/cli.py | 10 ++++------ sebs/azure/config.py | 4 ++-- sebs/azure/cosmosdb.py | 8 ++++---- sebs/azure/function.py | 6 +++--- sebs/azure/triggers.py | 8 ++++---- sebs/experiments/eviction_model.py | 3 +-- sebs/faas/nosql.py | 6 +++--- sebs/gcp/__init__.py | 8 ++++---- sebs/gcp/cli.py | 3 +-- sebs/gcp/config.py | 2 +- sebs/gcp/datastore.py | 2 +- sebs/gcp/function.py | 2 +- sebs/gcp/resources.py | 2 +- sebs/gcp/storage.py | 2 +- sebs/gcp/triggers.py | 5 ++--- sebs/openwhisk/container.py | 15 +++++++++++---- sebs/openwhisk/triggers.py | 5 ++++- sebs/regression.py | 3 ++- sebs/storage/__init__.py | 6 +++--- sebs/storage/config.py | 2 +- sebs/storage/minio.py | 7 ++++--- sebs/storage/scylladb.py | 3 ++- 26 files changed, 71 insertions(+), 65 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 0ee7c56a..9b27f696 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -12,7 +12,7 @@ build: # Build documentation in the "docs/" directory with Sphinx sphinx: - configuration: docs/source/conf.py + configuration: docs/source/conf.py # Optionally, but recommended, # declare the Python requirements required to build your documentation diff --git a/sebs/azure/__init__.py b/sebs/azure/__init__.py index 394fabb2..5f66bfbd 100644 --- a/sebs/azure/__init__.py +++ b/sebs/azure/__init__.py @@ -20,17 +20,16 @@ Example: Basic usage for Azure benchmarking: - ```python from sebs.azure import Azure, AzureConfig - + # Load configuration config = AzureConfig.deserialize(config_dict, cache, handlers) - + # Initialize Azure system azure = Azure(sebs_config, config, cache, docker_client, handlers) azure.initialize() - + # Deploy and benchmark functions function = azure.create_function(code_package, func_name, False, "") result = function.invoke(payload) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index b5be2243..f62634b3 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -17,15 +17,15 @@ Example: Basic usage for Azure benchmarking: - + ```python from sebs.azure.azure import Azure from sebs.azure.config import AzureConfig - + # Initialize Azure system with configuration azure_system = Azure(sebs_config, azure_config, cache, docker_client, handlers) azure_system.initialize() - + # Deploy and benchmark functions function = azure_system.create_function(code_package, func_name, False, "") result = function.invoke(payload) @@ -729,7 +729,7 @@ def download_metrics( resource_group = self.config.resources.resource_group(self.cli_instance) # Avoid warnings in the next step - ret = self.cli_instance.execute( + self.cli_instance.execute( "az feature register --name AIWorkspacePreview " "--namespace microsoft.insights" ) app_id_query = self.cli_instance.execute( diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index 90211181..e45a86c5 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -10,16 +10,15 @@ Example: Basic usage for Azure Blob Storage: - + ```python from sebs.azure.blob_storage import BlobStorage - + # Initialize with connection string storage = BlobStorage(region, cache, resources, connection_string, False) - + # Upload benchmark data storage.upload(container_name, filepath, key) - # Download results storage.download(container_name, key, local_filepath) ``` diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index 4995c282..2817f96f 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -10,19 +10,18 @@ Example: Basic usage for Azure CLI operations: - + ```python from sebs.azure.cli import AzureCLI - + # Initialize CLI container cli = AzureCLI(system_config, docker_client) - + # Login to Azure cli.login(app_id, tenant, password) - # Execute Azure CLI commands result = cli.execute("az group list") - + # Upload function package cli.upload_package(local_dir, container_dest) ``` @@ -32,7 +31,6 @@ import logging import os import tarfile -from typing import Optional import docker diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 0229a5de..6bec4020 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -12,11 +12,11 @@ Example: Basic usage for setting up Azure configuration: - + ```python from sebs.azure.config import AzureConfig, AzureCredentials, AzureResources from sebs.cache import Cache - + # Load configuration from config dict and cache config = AzureConfig.deserialize(config_dict, cache, handlers) credentials = config.credentials diff --git a/sebs/azure/cosmosdb.py b/sebs/azure/cosmosdb.py index 8cb48688..c4d5a428 100644 --- a/sebs/azure/cosmosdb.py +++ b/sebs/azure/cosmosdb.py @@ -10,17 +10,17 @@ Example: Basic usage for CosmosDB operations: - + ```python from sebs.azure.cosmosdb import CosmosDB - + # Initialize CosmosDB with account cosmosdb = CosmosDB(cache, resources, cosmosdb_account) - + # Set up benchmark database and containers db_name = cosmosdb.benchmark_database("my-benchmark") tables = cosmosdb.get_tables("my-benchmark") - + # Perform operations credentials = cosmosdb.credentials() ``` diff --git a/sebs/azure/function.py b/sebs/azure/function.py index a4501320..d522ce14 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -9,16 +9,16 @@ Example: Basic usage for creating an Azure Function: - + ```python from sebs.azure.function import AzureFunction from sebs.azure.config import AzureResources from sebs.faas.function import FunctionConfig - + # Create function with Azure-specific storage function = AzureFunction( name="my-function", - benchmark="test-benchmark", + benchmark="test-benchmark", code_hash="abc123", function_storage=storage_account, cfg=function_config diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 0710fab4..54cfb4ab 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -9,16 +9,16 @@ Example: Basic usage for HTTP trigger: - + ```python from sebs.azure.triggers import HTTPTrigger - + # Create HTTP trigger with function URL trigger = HTTPTrigger(function_url, data_storage_account) - + # Synchronous invocation result = trigger.sync_invoke(payload) - + # Asynchronous invocation future = trigger.async_invoke(payload) result = future.result() diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 9a2b2357..7ca90126 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -17,7 +17,6 @@ from typing import List, Optional, Tuple, TYPE_CHECKING, Dict, Any import multiprocessing from multiprocessing.pool import AsyncResult, ThreadPool -from typing import cast from sebs.faas.system import System as FaaSSystem from sebs.faas.function import Function, Trigger @@ -407,7 +406,7 @@ def run(self) -> None: Rethrow exceptions if appear """ for result in servers_results: - servers_ret = result.get() + result.get() for result in local_results: local_ret = result.get() diff --git a/sebs/faas/nosql.py b/sebs/faas/nosql.py index 045ffeef..835d7063 100644 --- a/sebs/faas/nosql.py +++ b/sebs/faas/nosql.py @@ -137,7 +137,7 @@ def envs(self) -> dict: """ Table naming convention and implementation requirements. - + Each table name follows this pattern: sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name} @@ -176,7 +176,7 @@ def create_benchmark_tables( """ Platform-specific table implementations: - + - AWS: DynamoDB Table - Azure: CosmosDB Container - Google Cloud: Firestore in Datastore Mode, Database @@ -223,7 +223,7 @@ def write_to_table( """ Table management operations: - + - AWS DynamoDB: Removing & recreating table is the cheapest & fastest option - Azure CosmosDB: Recreate container - Google Cloud: Also likely recreate diff --git a/sebs/gcp/__init__.py b/sebs/gcp/__init__.py index 032b66b6..9e5b0f3d 100644 --- a/sebs/gcp/__init__.py +++ b/sebs/gcp/__init__.py @@ -25,16 +25,16 @@ Example: Basic GCP system setup: - + from sebs.gcp import GCP, GCPConfig - + # Configure GCP with credentials config = GCPConfig.deserialize(config_dict, cache, handlers) - + # Initialize GCP system gcp_system = GCP(system_config, config, cache, docker_client, handlers) gcp_system.initialize() - + # Deploy a function function = gcp_system.create_function(benchmark, "my-function", False, "") """ diff --git a/sebs/gcp/cli.py b/sebs/gcp/cli.py index 206f8d02..96fa9c32 100644 --- a/sebs/gcp/cli.py +++ b/sebs/gcp/cli.py @@ -9,7 +9,7 @@ Example: Using the gcloud CLI interface: - + cli = GCloudCLI(credentials, system_config, docker_client) cli.login(project_name) result = cli.execute("gcloud functions list") @@ -18,7 +18,6 @@ import logging import os -from typing import Union import docker diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index a6094dc6..873735eb 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -17,7 +17,7 @@ Example: Basic GCP configuration setup: - + credentials = GCPCredentials("/path/to/service-account.json") resources = GCPResources() config = GCPConfig(credentials, resources) diff --git a/sebs/gcp/datastore.py b/sebs/gcp/datastore.py index a5f2bdc3..7c511527 100644 --- a/sebs/gcp/datastore.py +++ b/sebs/gcp/datastore.py @@ -10,7 +10,7 @@ Example: Using Datastore for benchmark NoSQL operations: - + datastore = Datastore(cli_instance, cache, resources, region) table_name = datastore.create_table("benchmark-name", "user-data", "user_id") datastore.write_to_table("benchmark-name", table_name, data, primary_key, secondary_key) diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index b9b21a90..ee56507d 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -9,7 +9,7 @@ Example: Creating a GCP function instance: - + config = FunctionConfig(memory=256, timeout=60, runtime="python39") function = GCPFunction("my-function", "benchmark-name", "hash123", config) """ diff --git a/sebs/gcp/resources.py b/sebs/gcp/resources.py index df6a9b2b..c4234f6b 100644 --- a/sebs/gcp/resources.py +++ b/sebs/gcp/resources.py @@ -10,7 +10,7 @@ Example: Creating and using GCP system resources: - + resources = GCPSystemResources(system_config, gcp_config, cache, docker_client, handlers) storage = resources.get_storage(replace_existing=False) datastore = resources.get_nosql_storage() diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index 1f532c62..e992139d 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -9,7 +9,7 @@ Example: Using GCP storage for benchmark files: - + storage = GCPStorage(region, cache, resources, replace_existing=False) bucket = storage.add_benchmark_bucket("my-benchmark") storage.upload(bucket, "/path/to/file.zip", "benchmark-code.zip") diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 9ac4b239..3a4924c6 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -10,12 +10,11 @@ Example: Using a library trigger for direct invocation: - + trigger = LibraryTrigger("my-function", gcp_client) result = trigger.sync_invoke({"input": "data"}) - Using an HTTP trigger: - + trigger = HTTPTrigger("https://region-project.cloudfunctions.net/my-function") result = trigger.sync_invoke({"input": "data"}) """ diff --git a/sebs/openwhisk/container.py b/sebs/openwhisk/container.py index 5a6ec9c2..d6e35108 100644 --- a/sebs/openwhisk/container.py +++ b/sebs/openwhisk/container.py @@ -29,8 +29,12 @@ class OpenWhiskContainer(DockerContainer): config: OpenWhisk configuration containing registry settings Example: - >>> container = OpenWhiskContainer(sys_config, ow_config, docker_client, True) - >>> registry, repo, tag, uri = container.registry_name("benchmark", "python", "3.8", "x86_64") + >>> container = OpenWhiskContainer( + ... sys_config, ow_config, docker_client, True + ... ) + >>> registry, repo, tag, uri = container.registry_name( + ... "benchmark", "python", "3.8", "x86_64" + ... ) """ @staticmethod @@ -96,8 +100,11 @@ def registry_name( - Complete image URI Example: - >>> registry, repo, tag, uri = container.registry_name("test", "python", "3.8", "x86_64") - >>> # Returns: ("Docker Hub", "sebs", "openwhisk-test-python-3.8-x86_64", "sebs:openwhisk-test-python-3.8-x86_64") + >>> registry, repo, tag, uri = container.registry_name( + ... "test", "python", "3.8", "x86_64" + ... ) + >>> # Returns: ("Docker Hub", "sebs", "openwhisk-test-python-3.8-x86_64", + ... "sebs:openwhisk-test-python-3.8-x86_64") """ registry_name = self.config.resources.docker_registry diff --git a/sebs/openwhisk/triggers.py b/sebs/openwhisk/triggers.py index 055bb65f..03956565 100644 --- a/sebs/openwhisk/triggers.py +++ b/sebs/openwhisk/triggers.py @@ -200,7 +200,10 @@ class HTTPTrigger(Trigger): url: HTTP URL for the web action endpoint Example: - >>> trigger = HTTPTrigger("my-function", "https://openwhisk.example.com/api/v1/web/guest/default/my-function.json") + >>> trigger = HTTPTrigger( + ... "my-function", + ... "https://openwhisk.example.com/api/v1/web/guest/default/my-function.json" + ... ) >>> result = trigger.sync_invoke({"key": "value"}) """ diff --git a/sebs/regression.py b/sebs/regression.py index 7282482f..a4fc6114 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -751,7 +751,8 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # Stream result handler for concurrent test execution -# Based on https://stackoverflow.com/questions/22484805/a-simple-working-example-for-testtools-concurrentstreamtestsuite +# Based on https://stackoverflow.com/questions/22484805/ +# a-simple-working-example-for-testtools-concurrentstreamtestsuite class TracingStreamResult(testtools.StreamResult): """Stream result handler for concurrent test execution. diff --git a/sebs/storage/__init__.py b/sebs/storage/__init__.py index 6afc41e8..d4346656 100644 --- a/sebs/storage/__init__.py +++ b/sebs/storage/__init__.py @@ -13,17 +13,17 @@ Key Components: - config: Configuration dataclasses for storage backends - - minio: MinIO-based object storage implementation + - minio: MinIO-based object storage implementation - scylladb: ScyllaDB-based NoSQL storage implementation - resources: Resource management for self-hosted storage deployments Example: To use MinIO object storage in a benchmark: - + ```python from sebs.storage.minio import Minio from sebs.storage.config import MinioConfig - + # Configure and start MinIO config = MinioConfig(mapped_port=9000, version="latest") storage = Minio(docker_client, cache_client, resources, False) diff --git a/sebs/storage/config.py b/sebs/storage/config.py index 3b4b217e..d6fca392 100644 --- a/sebs/storage/config.py +++ b/sebs/storage/config.py @@ -7,7 +7,7 @@ Key Classes: PersistentStorageConfig: Abstract base for object storage configurations - MinioConfig: Configuration for MinIO S3-compatible object storage + MinioConfig: Configuration for MinIO S3-compatible object storage NoSQLStorageConfig: Abstract base for NoSQL database configurations ScyllaDBConfig: Configuration for ScyllaDB DynamoDB-compatible storage diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index ace127ac..aa4eede4 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -231,7 +231,8 @@ def configure_connection(self) -> None: f"{json.dumps(self._storage_container.attrs, indent=2)}" ) raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._cfg.instance_id}" + f"Incorrect detection of IP address for container with id " + f"{self._cfg.instance_id}" ) self.logging.info("Starting minio instance at {}".format(self._cfg.address)) @@ -497,9 +498,9 @@ def serialize(self) -> Dict[str, Any]: """ Deserialization and inheritance support - + This implementation supports overriding this class. The main Minio class - is used to start/stop deployments. When overriding the implementation in + is used to start/stop deployments. When overriding the implementation in Local/OpenWhisk/..., we call the _deserialize method and provide an alternative implementation type. """ diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index c299fc45..512be703 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -226,7 +226,8 @@ def configure_connection(self) -> None: f"{json.dumps(self._storage_container.attrs, indent=2)}" ) raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._cfg.instance_id}" + f"Incorrect detection of IP address for container with id " + f"{self._cfg.instance_id}" ) self.logging.info("Starting ScyllaDB instance at {}".format(self._cfg.address)) From 7d597e1ee1a8ef3ca55300cf1f9c2a9786b7551c Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 25 Jun 2025 13:35:28 +0200 Subject: [PATCH 14/21] [docs] Remove duplicated elements --- docs/source/api/sebs.aws.rst | 8 ++++++++ docs/source/api/sebs.azure.rst | 9 +++++++++ docs/source/api/sebs.experiments.rst | 9 +++++++++ docs/source/api/sebs.faas.rst | 7 +++++++ docs/source/api/sebs.gcp.rst | 8 ++++++++ docs/source/api/sebs.local.rst | 5 +++++ docs/source/api/sebs.openwhisk.rst | 5 +++++ docs/source/api/sebs.rst | 6 +++++- docs/source/api/sebs.storage.rst | 4 ++++ docs/source/conf.py | 9 +++++++-- 10 files changed, 67 insertions(+), 3 deletions(-) diff --git a/docs/source/api/sebs.aws.rst b/docs/source/api/sebs.aws.rst index e0e5eaf1..e8a6f7d2 100644 --- a/docs/source/api/sebs.aws.rst +++ b/docs/source/api/sebs.aws.rst @@ -11,6 +11,7 @@ sebs.aws.aws module :members: :show-inheritance: :undoc-members: + :no-index: sebs.aws.config module ---------------------- @@ -19,6 +20,7 @@ sebs.aws.config module :members: :show-inheritance: :undoc-members: + :no-index: sebs.aws.container module ------------------------- @@ -27,6 +29,7 @@ sebs.aws.container module :members: :show-inheritance: :undoc-members: + :no-index: sebs.aws.dynamodb module ------------------------ @@ -35,6 +38,7 @@ sebs.aws.dynamodb module :members: :show-inheritance: :undoc-members: + :no-index: sebs.aws.function module ------------------------ @@ -43,6 +47,7 @@ sebs.aws.function module :members: :show-inheritance: :undoc-members: + :no-index: sebs.aws.resources module ------------------------- @@ -51,6 +56,7 @@ sebs.aws.resources module :members: :show-inheritance: :undoc-members: + :no-index: sebs.aws.s3 module ------------------ @@ -59,6 +65,7 @@ sebs.aws.s3 module :members: :show-inheritance: :undoc-members: + :no-index: sebs.aws.triggers module ------------------------ @@ -67,6 +74,7 @@ sebs.aws.triggers module :members: :show-inheritance: :undoc-members: + :no-index: Module contents --------------- diff --git a/docs/source/api/sebs.azure.rst b/docs/source/api/sebs.azure.rst index 059bfb90..e3f2c47a 100644 --- a/docs/source/api/sebs.azure.rst +++ b/docs/source/api/sebs.azure.rst @@ -11,6 +11,7 @@ sebs.azure.azure module :members: :show-inheritance: :undoc-members: + :no-index: sebs.azure.blob\_storage module ------------------------------- @@ -19,6 +20,7 @@ sebs.azure.blob\_storage module :members: :show-inheritance: :undoc-members: + :no-index: sebs.azure.cli module --------------------- @@ -27,6 +29,7 @@ sebs.azure.cli module :members: :show-inheritance: :undoc-members: + :no-index: sebs.azure.cloud\_resources module ---------------------------------- @@ -35,6 +38,7 @@ sebs.azure.cloud\_resources module :members: :show-inheritance: :undoc-members: + :no-index: sebs.azure.config module ------------------------ @@ -43,6 +47,7 @@ sebs.azure.config module :members: :show-inheritance: :undoc-members: + :no-index: sebs.azure.cosmosdb module -------------------------- @@ -51,6 +56,7 @@ sebs.azure.cosmosdb module :members: :show-inheritance: :undoc-members: + :no-index: sebs.azure.function module -------------------------- @@ -59,6 +65,7 @@ sebs.azure.function module :members: :show-inheritance: :undoc-members: + :no-index: sebs.azure.system\_resources module ----------------------------------- @@ -67,6 +74,7 @@ sebs.azure.system\_resources module :members: :show-inheritance: :undoc-members: + :no-index: sebs.azure.triggers module -------------------------- @@ -75,6 +83,7 @@ sebs.azure.triggers module :members: :show-inheritance: :undoc-members: + :no-index: Module contents --------------- diff --git a/docs/source/api/sebs.experiments.rst b/docs/source/api/sebs.experiments.rst index bc3ee1ab..fd234d9e 100644 --- a/docs/source/api/sebs.experiments.rst +++ b/docs/source/api/sebs.experiments.rst @@ -11,6 +11,7 @@ sebs.experiments.config module :members: :show-inheritance: :undoc-members: + :no-index: sebs.experiments.environment module ----------------------------------- @@ -19,6 +20,7 @@ sebs.experiments.environment module :members: :show-inheritance: :undoc-members: + :no-index: sebs.experiments.eviction\_model module --------------------------------------- @@ -27,6 +29,7 @@ sebs.experiments.eviction\_model module :members: :show-inheritance: :undoc-members: + :no-index: sebs.experiments.experiment module ---------------------------------- @@ -35,6 +38,7 @@ sebs.experiments.experiment module :members: :show-inheritance: :undoc-members: + :no-index: sebs.experiments.invocation\_overhead module -------------------------------------------- @@ -43,6 +47,7 @@ sebs.experiments.invocation\_overhead module :members: :show-inheritance: :undoc-members: + :no-index: sebs.experiments.network\_ping\_pong module ------------------------------------------- @@ -51,6 +56,7 @@ sebs.experiments.network\_ping\_pong module :members: :show-inheritance: :undoc-members: + :no-index: sebs.experiments.perf\_cost module ---------------------------------- @@ -59,6 +65,7 @@ sebs.experiments.perf\_cost module :members: :show-inheritance: :undoc-members: + :no-index: sebs.experiments.result module ------------------------------ @@ -67,6 +74,7 @@ sebs.experiments.result module :members: :show-inheritance: :undoc-members: + :no-index: sebs.experiments.startup\_time module ------------------------------------- @@ -75,6 +83,7 @@ sebs.experiments.startup\_time module :members: :show-inheritance: :undoc-members: + :no-index: Module contents --------------- diff --git a/docs/source/api/sebs.faas.rst b/docs/source/api/sebs.faas.rst index 61c33331..592f3676 100644 --- a/docs/source/api/sebs.faas.rst +++ b/docs/source/api/sebs.faas.rst @@ -11,6 +11,7 @@ sebs.faas.config module :members: :show-inheritance: :undoc-members: + :no-index: sebs.faas.container module -------------------------- @@ -19,6 +20,7 @@ sebs.faas.container module :members: :show-inheritance: :undoc-members: + :no-index: sebs.faas.function module ------------------------- @@ -27,6 +29,7 @@ sebs.faas.function module :members: :show-inheritance: :undoc-members: + :no-index: sebs.faas.nosql module ---------------------- @@ -35,6 +38,7 @@ sebs.faas.nosql module :members: :show-inheritance: :undoc-members: + :no-index: sebs.faas.resources module -------------------------- @@ -43,6 +47,7 @@ sebs.faas.resources module :members: :show-inheritance: :undoc-members: + :no-index: sebs.faas.storage module ------------------------ @@ -51,6 +56,7 @@ sebs.faas.storage module :members: :show-inheritance: :undoc-members: + :no-index: sebs.faas.system module ----------------------- @@ -59,6 +65,7 @@ sebs.faas.system module :members: :show-inheritance: :undoc-members: + :no-index: Module contents --------------- diff --git a/docs/source/api/sebs.gcp.rst b/docs/source/api/sebs.gcp.rst index eb4e93e4..724e9249 100644 --- a/docs/source/api/sebs.gcp.rst +++ b/docs/source/api/sebs.gcp.rst @@ -11,6 +11,7 @@ sebs.gcp.cli module :members: :show-inheritance: :undoc-members: + :no-index: sebs.gcp.config module ---------------------- @@ -19,6 +20,7 @@ sebs.gcp.config module :members: :show-inheritance: :undoc-members: + :no-index: sebs.gcp.datastore module ------------------------- @@ -27,6 +29,7 @@ sebs.gcp.datastore module :members: :show-inheritance: :undoc-members: + :no-index: sebs.gcp.function module ------------------------ @@ -35,6 +38,7 @@ sebs.gcp.function module :members: :show-inheritance: :undoc-members: + :no-index: sebs.gcp.gcp module ------------------- @@ -43,6 +47,7 @@ sebs.gcp.gcp module :members: :show-inheritance: :undoc-members: + :no-index: sebs.gcp.resources module ------------------------- @@ -51,6 +56,7 @@ sebs.gcp.resources module :members: :show-inheritance: :undoc-members: + :no-index: sebs.gcp.storage module ----------------------- @@ -59,6 +65,7 @@ sebs.gcp.storage module :members: :show-inheritance: :undoc-members: + :no-index: sebs.gcp.triggers module ------------------------ @@ -67,6 +74,7 @@ sebs.gcp.triggers module :members: :show-inheritance: :undoc-members: + :no-index: Module contents --------------- diff --git a/docs/source/api/sebs.local.rst b/docs/source/api/sebs.local.rst index 569956ad..7e5fa7c1 100644 --- a/docs/source/api/sebs.local.rst +++ b/docs/source/api/sebs.local.rst @@ -11,6 +11,7 @@ sebs.local.config module :members: :show-inheritance: :undoc-members: + :no-index: sebs.local.deployment module ---------------------------- @@ -19,6 +20,7 @@ sebs.local.deployment module :members: :show-inheritance: :undoc-members: + :no-index: sebs.local.function module -------------------------- @@ -27,6 +29,7 @@ sebs.local.function module :members: :show-inheritance: :undoc-members: + :no-index: sebs.local.local module ----------------------- @@ -35,6 +38,7 @@ sebs.local.local module :members: :show-inheritance: :undoc-members: + :no-index: sebs.local.measureMem module ---------------------------- @@ -43,6 +47,7 @@ sebs.local.measureMem module :members: :show-inheritance: :undoc-members: + :no-index: Module contents --------------- diff --git a/docs/source/api/sebs.openwhisk.rst b/docs/source/api/sebs.openwhisk.rst index 7b68da2d..4904e22d 100644 --- a/docs/source/api/sebs.openwhisk.rst +++ b/docs/source/api/sebs.openwhisk.rst @@ -11,6 +11,7 @@ sebs.openwhisk.config module :members: :show-inheritance: :undoc-members: + :no-index: sebs.openwhisk.container module ------------------------------- @@ -19,6 +20,7 @@ sebs.openwhisk.container module :members: :show-inheritance: :undoc-members: + :no-index: sebs.openwhisk.function module ------------------------------ @@ -27,6 +29,7 @@ sebs.openwhisk.function module :members: :show-inheritance: :undoc-members: + :no-index: sebs.openwhisk.openwhisk module ------------------------------- @@ -35,6 +38,7 @@ sebs.openwhisk.openwhisk module :members: :show-inheritance: :undoc-members: + :no-index: sebs.openwhisk.triggers module ------------------------------ @@ -43,6 +47,7 @@ sebs.openwhisk.triggers module :members: :show-inheritance: :undoc-members: + :no-index: Module contents --------------- diff --git a/docs/source/api/sebs.rst b/docs/source/api/sebs.rst index db708110..ccdd32e9 100644 --- a/docs/source/api/sebs.rst +++ b/docs/source/api/sebs.rst @@ -24,8 +24,9 @@ sebs.benchmark module .. automodule:: sebs.benchmark :members: - :show-inheritance: :undoc-members: + :show-inheritance: + :no-index: sebs.cache module ----------------- @@ -34,6 +35,7 @@ sebs.cache module :members: :show-inheritance: :undoc-members: + :no-index: sebs.config module ------------------ @@ -50,6 +52,7 @@ sebs.sebs module :members: :show-inheritance: :undoc-members: + :no-index: sebs.statistics module ---------------------- @@ -74,6 +77,7 @@ sebs.utils module :members: :show-inheritance: :undoc-members: + :no-index: sebs.version module ------------------- diff --git a/docs/source/api/sebs.storage.rst b/docs/source/api/sebs.storage.rst index aebb9aa6..d6cb81ab 100644 --- a/docs/source/api/sebs.storage.rst +++ b/docs/source/api/sebs.storage.rst @@ -11,6 +11,7 @@ sebs.storage.config module :members: :show-inheritance: :undoc-members: + :no-index: sebs.storage.minio module ------------------------- @@ -19,6 +20,7 @@ sebs.storage.minio module :members: :show-inheritance: :undoc-members: + :no-index: sebs.storage.resources module ----------------------------- @@ -27,6 +29,7 @@ sebs.storage.resources module :members: :show-inheritance: :undoc-members: + :no-index: sebs.storage.scylladb module ---------------------------- @@ -35,6 +38,7 @@ sebs.storage.scylladb module :members: :show-inheritance: :undoc-members: + :no-index: Module contents --------------- diff --git a/docs/source/conf.py b/docs/source/conf.py index d61af4cf..3cafa3c2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -20,12 +20,17 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -# extensions = ["sphinx.ext.napoleon", "sphinx.ext.autodoc", "sphinx.ext.viewcode"] -extensions = ["sphinx.ext.napoleon", "sphinx.ext.viewcode"] +extensions = ["sphinx.ext.napoleon", "sphinx.ext.autodoc", "sphinx.ext.viewcode"] templates_path = ["_templates"] exclude_patterns = [] +# -- Autodoc configuration -------------------------------------------------- +# Let RST files control documentation generation explicitly to avoid duplicates + +# Suppress duplicate object warnings +suppress_warnings = ['autosectionlabel.*'] + # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output From f26d03d41d16545a87bc31b81db4d28103e79cf1 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 25 Jun 2025 13:38:33 +0200 Subject: [PATCH 15/21] [docs] Removing warnings --- sebs/aws/aws.py | 12 ++++++------ sebs/azure/__init__.py | 22 +++++++++++----------- sebs/azure/azure.py | 20 +++++++++----------- sebs/azure/blob_storage.py | 18 +++++++++--------- sebs/azure/cli.py | 22 +++++++++++----------- sebs/azure/config.py | 18 +++++++++--------- sebs/azure/cosmosdb.py | 20 ++++++++++---------- sebs/azure/function.py | 28 ++++++++++++++-------------- sebs/azure/triggers.py | 20 ++++++++++---------- sebs/benchmark.py | 2 ++ 10 files changed, 91 insertions(+), 91 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 24f68d2e..12aeaff8 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -180,13 +180,13 @@ def package_code( """ Package code for deployment to AWS Lambda. - Creates a suitable deployment package with the following structure: + Creates a suitable deployment package with the following structure:: - function/ - - function.py - - storage.py - - resources/ - handler.py + function/ + - function.py + - storage.py + - resources/ + handler.py It would be sufficient to just pack the code and ship it as zip to AWS. However, to have a compatible function implementation across providers, diff --git a/sebs/azure/__init__.py b/sebs/azure/__init__.py index 5f66bfbd..dba97945 100644 --- a/sebs/azure/__init__.py +++ b/sebs/azure/__init__.py @@ -20,20 +20,20 @@ Example: Basic usage for Azure benchmarking: - ```python - from sebs.azure import Azure, AzureConfig + :: - # Load configuration - config = AzureConfig.deserialize(config_dict, cache, handlers) + from sebs.azure import Azure, AzureConfig - # Initialize Azure system - azure = Azure(sebs_config, config, cache, docker_client, handlers) - azure.initialize() + # Load configuration + config = AzureConfig.deserialize(config_dict, cache, handlers) - # Deploy and benchmark functions - function = azure.create_function(code_package, func_name, False, "") - result = function.invoke(payload) - ``` + # Initialize Azure system + azure = Azure(sebs_config, config, cache, docker_client, handlers) + azure.initialize() + + # Deploy and benchmark functions + function = azure.create_function(code_package, func_name, False, "") + result = function.invoke(payload) """ from .azure import Azure # noqa diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index f62634b3..c0d63aea 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -16,20 +16,18 @@ functionality for serverless function benchmarking. Example: - Basic usage for Azure benchmarking: + Basic usage for Azure benchmarking:: - ```python - from sebs.azure.azure import Azure - from sebs.azure.config import AzureConfig + from sebs.azure.azure import Azure + from sebs.azure.config import AzureConfig - # Initialize Azure system with configuration - azure_system = Azure(sebs_config, azure_config, cache, docker_client, handlers) - azure_system.initialize() + # Initialize Azure system with configuration + azure_system = Azure(sebs_config, azure_config, cache, docker_client, handlers) + azure_system.initialize() - # Deploy and benchmark functions - function = azure_system.create_function(code_package, func_name, False, "") - result = function.invoke(payload) - ``` + # Deploy and benchmark functions + function = azure_system.create_function(code_package, func_name, False, "") + result = function.invoke(payload) """ import datetime diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index e45a86c5..31044194 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -11,17 +11,17 @@ Example: Basic usage for Azure Blob Storage: - ```python - from sebs.azure.blob_storage import BlobStorage + :: - # Initialize with connection string - storage = BlobStorage(region, cache, resources, connection_string, False) + from sebs.azure.blob_storage import BlobStorage - # Upload benchmark data - storage.upload(container_name, filepath, key) - # Download results - storage.download(container_name, key, local_filepath) - ``` + # Initialize with connection string + storage = BlobStorage(region, cache, resources, connection_string, False) + + # Upload benchmark data + storage.upload(container_name, filepath, key) + # Download results + storage.download(container_name, key, local_filepath) """ import os diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index 2817f96f..f1e8c884 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -11,20 +11,20 @@ Example: Basic usage for Azure CLI operations: - ```python - from sebs.azure.cli import AzureCLI + :: - # Initialize CLI container - cli = AzureCLI(system_config, docker_client) + from sebs.azure.cli import AzureCLI - # Login to Azure - cli.login(app_id, tenant, password) - # Execute Azure CLI commands - result = cli.execute("az group list") + # Initialize CLI container + cli = AzureCLI(system_config, docker_client) - # Upload function package - cli.upload_package(local_dir, container_dest) - ``` + # Login to Azure + cli.login(app_id, tenant, password) + # Execute Azure CLI commands + result = cli.execute("az group list") + + # Upload function package + cli.upload_package(local_dir, container_dest) """ import io diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 6bec4020..08dcb0aa 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -13,15 +13,15 @@ Example: Basic usage for setting up Azure configuration: - ```python - from sebs.azure.config import AzureConfig, AzureCredentials, AzureResources - from sebs.cache import Cache - - # Load configuration from config dict and cache - config = AzureConfig.deserialize(config_dict, cache, handlers) - credentials = config.credentials - resources = config.resources - ``` + :: + + from sebs.azure.config import AzureConfig, AzureCredentials, AzureResources + from sebs.cache import Cache + + # Load configuration from config dict and cache + config = AzureConfig.deserialize(config_dict, cache, handlers) + credentials = config.credentials + resources = config.resources """ import json diff --git a/sebs/azure/cosmosdb.py b/sebs/azure/cosmosdb.py index c4d5a428..abc3e923 100644 --- a/sebs/azure/cosmosdb.py +++ b/sebs/azure/cosmosdb.py @@ -11,19 +11,19 @@ Example: Basic usage for CosmosDB operations: - ```python - from sebs.azure.cosmosdb import CosmosDB + :: - # Initialize CosmosDB with account - cosmosdb = CosmosDB(cache, resources, cosmosdb_account) + from sebs.azure.cosmosdb import CosmosDB - # Set up benchmark database and containers - db_name = cosmosdb.benchmark_database("my-benchmark") - tables = cosmosdb.get_tables("my-benchmark") + # Initialize CosmosDB with account + cosmosdb = CosmosDB(cache, resources, cosmosdb_account) - # Perform operations - credentials = cosmosdb.credentials() - ``` + # Set up benchmark database and containers + db_name = cosmosdb.benchmark_database("my-benchmark") + tables = cosmosdb.get_tables("my-benchmark") + + # Perform operations + credentials = cosmosdb.credentials() """ from dataclasses import dataclass diff --git a/sebs/azure/function.py b/sebs/azure/function.py index d522ce14..72006341 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -10,20 +10,20 @@ Example: Basic usage for creating an Azure Function: - ```python - from sebs.azure.function import AzureFunction - from sebs.azure.config import AzureResources - from sebs.faas.function import FunctionConfig - - # Create function with Azure-specific storage - function = AzureFunction( - name="my-function", - benchmark="test-benchmark", - code_hash="abc123", - function_storage=storage_account, - cfg=function_config - ) - ``` + :: + + from sebs.azure.function import AzureFunction + from sebs.azure.config import AzureResources + from sebs.faas.function import FunctionConfig + + # Create function with Azure-specific storage + function = AzureFunction( + name="my-function", + benchmark="test-benchmark", + code_hash="abc123", + function_storage=storage_account, + cfg=function_config + ) """ from sebs.azure.config import AzureResources diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 54cfb4ab..d903fa09 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -10,19 +10,19 @@ Example: Basic usage for HTTP trigger: - ```python - from sebs.azure.triggers import HTTPTrigger + :: - # Create HTTP trigger with function URL - trigger = HTTPTrigger(function_url, data_storage_account) + from sebs.azure.triggers import HTTPTrigger - # Synchronous invocation - result = trigger.sync_invoke(payload) + # Create HTTP trigger with function URL + trigger = HTTPTrigger(function_url, data_storage_account) - # Asynchronous invocation - future = trigger.async_invoke(payload) - result = future.result() - ``` + # Synchronous invocation + result = trigger.sync_invoke(payload) + + # Asynchronous invocation + future = trigger.async_invoke(payload) + result = future.result() """ import concurrent.futures diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 2eb20685..3aa7ebb8 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -38,6 +38,7 @@ class BenchmarkConfig: timeout, memory allocation, supported languages, and included modules. Attributes: + timeout: Maximum execution time in seconds memory: Memory allocation in MB languages: List of supported programming languages @@ -152,6 +153,7 @@ class Benchmark(LoggingBase): images corresponding to the target cloud deployment. The behavior of the class depends on cache state: + 1. If there's no cache entry, a code package is built 2. Otherwise, the hash of the entire benchmark is computed and compared with the cached value. If changed, it rebuilds the benchmark From c3652e2637f17e3237b71fae97e7fbc561236fe2 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 25 Jun 2025 13:49:33 +0200 Subject: [PATCH 16/21] [docs] Add table of content --- docs/source/api/sebs.aws.rst | 4 ++++ docs/source/api/sebs.azure.rst | 4 ++++ docs/source/api/sebs.experiments.rst | 4 ++++ docs/source/api/sebs.faas.rst | 4 ++++ docs/source/api/sebs.gcp.rst | 4 ++++ docs/source/api/sebs.local.rst | 4 ++++ docs/source/api/sebs.openwhisk.rst | 4 ++++ docs/source/api/sebs.rst | 4 ++++ docs/source/api/sebs.storage.rst | 4 ++++ 9 files changed, 36 insertions(+) diff --git a/docs/source/api/sebs.aws.rst b/docs/source/api/sebs.aws.rst index e8a6f7d2..43486972 100644 --- a/docs/source/api/sebs.aws.rst +++ b/docs/source/api/sebs.aws.rst @@ -1,6 +1,10 @@ sebs.aws package ================ +.. contents:: Table of Contents + :local: + :depth: 2 + Submodules ---------- diff --git a/docs/source/api/sebs.azure.rst b/docs/source/api/sebs.azure.rst index e3f2c47a..e8966380 100644 --- a/docs/source/api/sebs.azure.rst +++ b/docs/source/api/sebs.azure.rst @@ -1,6 +1,10 @@ sebs.azure package ================== +.. contents:: Table of Contents + :local: + :depth: 2 + Submodules ---------- diff --git a/docs/source/api/sebs.experiments.rst b/docs/source/api/sebs.experiments.rst index fd234d9e..1b68b2cd 100644 --- a/docs/source/api/sebs.experiments.rst +++ b/docs/source/api/sebs.experiments.rst @@ -1,6 +1,10 @@ sebs.experiments package ======================== +.. contents:: Table of Contents + :local: + :depth: 2 + Submodules ---------- diff --git a/docs/source/api/sebs.faas.rst b/docs/source/api/sebs.faas.rst index 592f3676..4bfa5c9f 100644 --- a/docs/source/api/sebs.faas.rst +++ b/docs/source/api/sebs.faas.rst @@ -1,6 +1,10 @@ sebs.faas package ================= +.. contents:: Table of Contents + :local: + :depth: 2 + Submodules ---------- diff --git a/docs/source/api/sebs.gcp.rst b/docs/source/api/sebs.gcp.rst index 724e9249..fb389006 100644 --- a/docs/source/api/sebs.gcp.rst +++ b/docs/source/api/sebs.gcp.rst @@ -1,6 +1,10 @@ sebs.gcp package ================ +.. contents:: Table of Contents + :local: + :depth: 2 + Submodules ---------- diff --git a/docs/source/api/sebs.local.rst b/docs/source/api/sebs.local.rst index 7e5fa7c1..38da3a92 100644 --- a/docs/source/api/sebs.local.rst +++ b/docs/source/api/sebs.local.rst @@ -1,6 +1,10 @@ sebs.local package ================== +.. contents:: Table of Contents + :local: + :depth: 2 + Submodules ---------- diff --git a/docs/source/api/sebs.openwhisk.rst b/docs/source/api/sebs.openwhisk.rst index 4904e22d..eaab1422 100644 --- a/docs/source/api/sebs.openwhisk.rst +++ b/docs/source/api/sebs.openwhisk.rst @@ -1,6 +1,10 @@ sebs.openwhisk package ====================== +.. contents:: Table of Contents + :local: + :depth: 2 + Submodules ---------- diff --git a/docs/source/api/sebs.rst b/docs/source/api/sebs.rst index ccdd32e9..84afa8ab 100644 --- a/docs/source/api/sebs.rst +++ b/docs/source/api/sebs.rst @@ -1,6 +1,10 @@ sebs package ============ +.. contents:: Table of Contents + :local: + :depth: 2 + Subpackages ----------- diff --git a/docs/source/api/sebs.storage.rst b/docs/source/api/sebs.storage.rst index d6cb81ab..a57e381a 100644 --- a/docs/source/api/sebs.storage.rst +++ b/docs/source/api/sebs.storage.rst @@ -1,6 +1,10 @@ sebs.storage package ==================== +.. contents:: Table of Contents + :local: + :depth: 2 + Submodules ---------- From b8216282760eaed5032e199c89c75f7b399a6c30 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 25 Jun 2025 16:15:22 +0200 Subject: [PATCH 17/21] [docs] Manual corrections to generated docstrings --- sebs/aws/dynamodb.py | 7 +++++-- sebs/aws/function.py | 2 +- sebs/aws/s3.py | 12 ++++++++---- sebs/azure/azure.py | 10 ++++++++-- sebs/azure/blob_storage.py | 6 +++++- sebs/azure/cli.py | 10 ++++++++-- 6 files changed, 35 insertions(+), 12 deletions(-) diff --git a/sebs/aws/dynamodb.py b/sebs/aws/dynamodb.py index bbea3e73..39fd6d94 100644 --- a/sebs/aws/dynamodb.py +++ b/sebs/aws/dynamodb.py @@ -180,10 +180,13 @@ def create_table( ) -> str: """Create a DynamoDB table for benchmark data. - Creates a DynamoDB table with a unique name for the benchmark. Unlike - Azure (account -> database -> container) and GCP (database per benchmark), + Creates a unique DynamoDB table name using resource ID, benchmark name, and provided name. + Unlike Azure (account -> database -> container) and GCP (database per benchmark), AWS requires unique table names across the account. + The function handles cases where the table already exists or is being created. + Uses PAY_PER_REQUEST billing mode. + Args: benchmark: Name of the benchmark name: Logical table name diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 10aac845..baa2917d 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -19,7 +19,7 @@ class LambdaFunction(Function): This class represents an AWS Lambda function in the serverless benchmarking suite. It extends the base Function class with AWS-specific attributes and - functionality. + functionality, like resource ARN, role, and optional bucket for code deployment. Attributes: arn: Amazon Resource Name of the Lambda function diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index bc0d5b61..3055aacc 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -98,7 +98,7 @@ def __init__( self.cached = False def correct_name(self, name: str) -> str: - """Correct bucket name for S3 naming requirements. + """No correction is needed for S3 bucket name. Args: name: Original bucket name @@ -109,7 +109,7 @@ def correct_name(self, name: str) -> str: return name def _create_bucket( - self, name: str, buckets: List[str] = [], randomize_name: bool = False + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False ) -> str: """Create an S3 bucket with the specified name. @@ -129,6 +129,9 @@ def _create_bucket( ClientError: If bucket creation fails for other reasons RuntimeError: If bucket already exists in us-east-1 region """ + if buckets is None: + buckets = [] + for bucket_name in buckets: if name in bucket_name: self.logging.info( @@ -179,8 +182,9 @@ def _create_bucket( def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: """Upload a file to S3 with caching and replacement logic. - Handles the upload of benchmark files with appropriate caching behavior - and replacement logic based on configuration. + Handles the upload of benchmark files with appropriate caching behavior: + skips upload if using cached buckets and not replacing existing files, + and we know that the file is already uploaded. Args: path_idx: Index of the input path configuration diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index c0d63aea..b1b20a27 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -172,7 +172,8 @@ def find_deployments(self) -> List[str]: """Find existing SeBS deployments by scanning resource groups. Looks for Azure resource groups matching the SeBS naming pattern - to identify existing deployments that can be reused. + - sebs_resource_group_(.*) - to identify existing deployments + that can be reused. Returns: List of deployment identifiers found in resource groups. @@ -296,6 +297,8 @@ def publish_function( Deploys the packaged function code to Azure Functions using the Azure Functions CLI tools. Handles retries and URL extraction. + Will repeat on failure, which is useful to handle delays in + Azure cache updates - it can take between 30 and 60 seconds. Args: function: Function instance to publish @@ -378,6 +381,8 @@ def update_function( Updates an existing Azure Function with new code package, including environment variables and function configuration. + It also ensures an HTTP trigger is correctly associated with + the function's URL. Args: function: Function instance to update @@ -813,7 +818,8 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) """Enforce cold start for multiple functions. Forces cold start behavior for all provided functions by updating - environment variables and waiting for changes to propagate. + environment variables and waiting for changes to propagate: + sleep is added to allow changes to propagate. Args: functions: List of functions to enforce cold start for diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index 31044194..b682fa46 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -86,7 +86,7 @@ def __init__( self.client: BlobServiceClient = BlobServiceClient.from_connection_string(conn_string) def _create_bucket( - self, name: str, containers: List[str] = [], randomize_name: bool = False + self, name: str, containers: Optional[List[str]] = None, randomize_name: bool = False ) -> str: """Create new Azure Blob Storage container. @@ -101,6 +101,10 @@ def _create_bucket( Returns: Name of the created or existing container. """ + + if containers is None: + containers = [] + for c in containers: if name in c: self.logging.info("Container {} for {} already exists, skipping.".format(c, name)) diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index f1e8c884..b7648202 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -166,8 +166,14 @@ def upload_package(self, directory: str, dest: str) -> None: Note: This implementation loads the entire archive into memory, - which may not be efficient for very large function packages. - For large packages, consider using docker cp directly. + This is an inefficient and memory-intensive implementation. + So far, we didn't have very large functions that require many gigabytes. + docker-py does not support a straightforward copy and we can't + call put_archive with chunks. + + For large packages, there are two potential solutions: + (1) manually call docker cp and decompress + (2) commit the docker container and restart with a new mounted volume. Args: directory: Local directory containing function package From afa2945bee7be2f3ce4136f4509a696b8c998334 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 25 Jun 2025 16:18:29 +0200 Subject: [PATCH 18/21] [system] Adapt API of storage to avoid passing mutable arguments --- sebs/benchmark.py | 2 +- sebs/faas/storage.py | 2 +- sebs/gcp/storage.py | 6 +++++- sebs/storage/minio.py | 6 +++++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 3aa7ebb8..9757c0a5 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -153,7 +153,7 @@ class Benchmark(LoggingBase): images corresponding to the target cloud deployment. The behavior of the class depends on cache state: - + 1. If there's no cache entry, a code package is built 2. Otherwise, the hash of the entire benchmark is computed and compared with the cached value. If changed, it rebuilds the benchmark diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 43165baa..dc4cc544 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -75,7 +75,7 @@ def find_deployments(self) -> List[str]: @abstractmethod def _create_bucket( - self, name: str, buckets: List[str] = [], randomize_name: bool = False + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False ) -> str: pass diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index e992139d..3d0a5e86 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -94,7 +94,7 @@ def correct_name(self, name: str) -> str: return name def _create_bucket( - self, name: str, buckets: List[str] = [], randomize_name: bool = False + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False ) -> str: """Create a new Cloud Storage bucket or return existing one. @@ -106,6 +106,10 @@ def _create_bucket( Returns: Name of the created or existing bucket """ + + if buckets is None: + buckets = [] + found_bucket = False for bucket_name in buckets: if name in bucket_name: diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index aa4eede4..54b2665a 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -272,7 +272,7 @@ def get_connection(self) -> minio.Minio: ) def _create_bucket( - self, name: str, buckets: List[str] = [], randomize_name: bool = False + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False ) -> str: """ Create a new bucket if it doesn't already exist. @@ -291,6 +291,10 @@ def _create_bucket( Raises: minio.error.ResponseError: If bucket creation fails """ + + if buckets is None: + buckets = [] + # Check if bucket already exists for bucket_name in buckets: if name in bucket_name: From 695e62d362b76d59152497650a5e785096b8cd32 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 25 Jun 2025 16:49:18 +0200 Subject: [PATCH 19/21] [docs] More docstrings --- sebs/azure/cloud_resources.py | 130 ++++++++++++-- sebs/azure/cosmosdb.py | 187 +++++++++++++++++-- sebs/azure/system_resources.py | 120 +++++++++++-- sebs/benchmark.py | 298 +++++++++++++++++++++++++++---- sebs/faas/config.py | 317 ++++++++++++++++++++++++++++----- sebs/faas/container.py | 111 ++++++++++++ sebs/faas/resources.py | 81 +++++++-- sebs/faas/storage.py | 107 ++++++++++- 8 files changed, 1212 insertions(+), 139 deletions(-) diff --git a/sebs/azure/cloud_resources.py b/sebs/azure/cloud_resources.py index 51cc80e2..5476c552 100644 --- a/sebs/azure/cloud_resources.py +++ b/sebs/azure/cloud_resources.py @@ -1,29 +1,64 @@ +"""Azure cloud resource management for SeBS. + +This module manages deployed special resources in Azure cloud, particularly +CosmosDB accounts that require special handling for authentication and +configuration management. +""" + import json -from typing import Optional +from typing import Dict, Optional from sebs.azure.cli import AzureCLI -""" - Keep a list of deployed special resources in Azure cloud. - Currently, we have here CosmosDB accounts that require special handling. -""" +class CosmosDBAccount: + """Azure CosmosDB account configuration and management. + Manages CosmosDB account information including account name, endpoint URL, + and authentication credentials. Provides methods for querying account + details from Azure and serialization for caching. + + Attributes: + _account_name (str): Name of the CosmosDB account + _url (str): Document endpoint URL for the account + _credential (str): Primary master key for authentication + """ -class CosmosDBAccount: @property def account_name(self) -> str: + """Get the CosmosDB account name. + + Returns: + str: The name of the CosmosDB account. + """ return self._account_name @property def url(self) -> str: + """Get the CosmosDB document endpoint URL. + + Returns: + str: The document endpoint URL for the CosmosDB account. + """ return self._url @property def credential(self) -> str: + """Get the CosmosDB authentication credential. + + Returns: + str: The primary master key for CosmosDB authentication. + """ return self._credential - def __init__(self, account_name: str, url: str, credential: str): + def __init__(self, account_name: str, url: str, credential: str) -> None: + """Initialize CosmosDB account configuration. + + Args: + account_name (str): Name of the CosmosDB account + url (str): Document endpoint URL for the account + credential (str): Primary master key for authentication + """ super().__init__() self._account_name = account_name self._url = url @@ -31,13 +66,36 @@ def __init__(self, account_name: str, url: str, credential: str): @staticmethod def from_cache(account_name: str, url: str, credential: str) -> "CosmosDBAccount": + """Create CosmosDB account instance from cached data. + + Args: + account_name (str): Name of the CosmosDB account + url (str): Document endpoint URL for the account + credential (str): Primary master key for authentication + + Returns: + CosmosDBAccount: New instance with provided configuration. + """ return CosmosDBAccount(account_name, url, credential) @staticmethod def from_allocation( - account_name: str, resource_group: str, cli_instance: AzureCLI, url: Optional[str] + account_name: str, resource_group: str, cli_instance: AzureCLI, url: Optional[str] = None ) -> "CosmosDBAccount": + """Create CosmosDB account instance by querying Azure. + + Queries Azure CLI to retrieve account configuration including + endpoint URL and authentication credentials. + Args: + account_name (str): Name of the CosmosDB account + resource_group (str): Azure resource group containing the account + cli_instance (AzureCLI): Azure CLI instance for executing commands + url (Optional[str]): Pre-known URL, if None will query from Azure + + Returns: + CosmosDBAccount: New instance with queried configuration. + """ if url is None: url = CosmosDBAccount.query_url( account_name, @@ -55,7 +113,23 @@ def from_allocation( @staticmethod def query_url(account_name: str, resource_group: str, cli_instance: AzureCLI) -> str: + """Query CosmosDB account endpoint URL from Azure. + + Uses Azure CLI to retrieve the document endpoint URL for the + specified CosmosDB account. + Args: + account_name (str): Name of the CosmosDB account + resource_group (str): Azure resource group containing the account + cli_instance (AzureCLI): Azure CLI instance for executing commands + + Returns: + str: The document endpoint URL for the CosmosDB account. + + Raises: + RuntimeError: If Azure CLI command fails. + KeyError: If the expected response structure is not found. + """ # Find the endpoint URL ret = cli_instance.execute( f" az cosmosdb show --name {account_name} " f" --resource-group {resource_group} " @@ -65,7 +139,23 @@ def query_url(account_name: str, resource_group: str, cli_instance: AzureCLI) -> @staticmethod def query_credentials(account_name: str, resource_group: str, cli_instance: AzureCLI) -> str: + """Query CosmosDB account authentication credentials from Azure. + + Uses Azure CLI to retrieve the primary master key for the + specified CosmosDB account. + Args: + account_name (str): Name of the CosmosDB account + resource_group (str): Azure resource group containing the account + cli_instance (AzureCLI): Azure CLI instance for executing commands + + Returns: + str: The primary master key for CosmosDB authentication. + + Raises: + RuntimeError: If Azure CLI command fails. + KeyError: If the expected response structure is not found. + """ # Read the master key to access CosmosDB account ret = cli_instance.execute( f" az cosmosdb keys list --name {account_name} " f" --resource-group {resource_group} " @@ -75,7 +165,15 @@ def query_credentials(account_name: str, resource_group: str, cli_instance: Azur return credential - def serialize(self) -> dict: + def serialize(self) -> Dict[str, str]: + """Serialize CosmosDB account configuration to dictionary. + + Returns: + Dict[str, str]: Dictionary containing account configuration with keys: + - account_name: The CosmosDB account name + - url: The document endpoint URL + - credential: The primary master key + """ return { "account_name": self._account_name, "url": self._url, @@ -83,5 +181,17 @@ def serialize(self) -> dict: } @staticmethod - def deserialize(obj: dict) -> "CosmosDBAccount": + def deserialize(obj: Dict[str, str]) -> "CosmosDBAccount": + """Deserialize CosmosDB account configuration from dictionary. + + Args: + obj (Dict[str, str]): Dictionary containing account configuration + with required keys: account_name, url, credential + + Returns: + CosmosDBAccount: New instance with deserialized configuration. + + Raises: + KeyError: If required keys are missing from the dictionary. + """ return CosmosDBAccount.from_cache(obj["account_name"], obj["url"], obj["credential"]) diff --git a/sebs/azure/cosmosdb.py b/sebs/azure/cosmosdb.py index abc3e923..71835b05 100644 --- a/sebs/azure/cosmosdb.py +++ b/sebs/azure/cosmosdb.py @@ -79,15 +79,56 @@ def deserialize(config: dict) -> "BenchmarkResources": class CosmosDB(NoSQLStorage): + """Azure CosmosDB implementation for NoSQL storage in SeBS benchmarking. + + This class provides Azure CosmosDB integration for NoSQL benchmarks, + handling database and container management, data operations, and + resource lifecycle. It supports benchmark-specific database allocation + and container creation with proper caching and error handling. + + Azure CosmosDB uses a different model than traditional NoSQL databases: + - Each benchmark gets its own database + - Container names match benchmark table names directly + - No table mappings are required + - Partition keys are configured per container + + Attributes: + _cli_instance: Azure CLI instance for CosmosDB operations + _resource_group: Name of Azure resource group containing CosmosDB + _benchmark_resources: Dict mapping benchmark names to their resources + _cosmos_client: CosmosDB client for database operations + _cosmosdb_account: CosmosDB account configuration and credentials + """ + @staticmethod def typename() -> str: + """Get the storage type name. + + Returns: + String identifier for Azure CosmosDB storage type. + """ return "Azure.CosmosDB" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment platform name. + + Returns: + String identifier for Azure deployment. + """ return "azure" - def __init__(self, cli: AzureCLI, cache_client: Cache, resources: AzureResources, region: str): + def __init__( + self, cli: AzureCLI, cache_client: Cache, resources: AzureResources, region: str + ) -> None: + """Initialize CosmosDB storage handler. + + Args: + cli: Azure CLI instance for executing CosmosDB operations + cache_client: Cache instance for storing/retrieving configurations + resources: Azure resources manager for resource allocation + region: Azure region for resource placement + """ super().__init__(region, cache_client, resources) self._cli_instance = cli self._resource_group = resources.resource_group(self._cli_instance) @@ -96,15 +137,33 @@ def __init__(self, cli: AzureCLI, cache_client: Cache, resources: AzureResources self._cosmos_client: Optional[CosmosClient] = None self._cosmosdb_account: Optional[CosmosDBAccount] = None - """ - Azure requires no table mappings: the name of container is the same as benchmark name. - """ - def get_tables(self, benchmark: str) -> Dict[str, str]: + """Get table mappings for benchmark. + + Azure requires no table mappings since container names match + benchmark table names directly. + + Args: + benchmark: Name of the benchmark + + Returns: + Empty dictionary as no mappings are needed for Azure CosmosDB. + """ return {} def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """Get the actual table name for a benchmark table. + + Validates that the table exists in the benchmark's containers + and returns the table name if found. + Args: + benchmark: Name of the benchmark + table: Logical table name to resolve + + Returns: + Actual table name if found, None if benchmark or table doesn't exist. + """ if benchmark not in self._benchmark_resources: return None @@ -114,7 +173,17 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: return table def retrieve_cache(self, benchmark: str) -> bool: + """Retrieve benchmark resources from cache. + + Attempts to load cached benchmark resources including database + and container information from the filesystem cache. + Args: + benchmark: Name of the benchmark to retrieve from cache + + Returns: + True if cache was found and loaded, False otherwise. + """ if benchmark in self._benchmark_resources: return True @@ -125,14 +194,28 @@ def retrieve_cache(self, benchmark: str) -> bool: return False - def update_cache(self, benchmark: str): + def update_cache(self, benchmark: str) -> None: + """Update benchmark resources in cache. + + Persists current benchmark resources including database and + container information to the filesystem cache. + Args: + benchmark: Name of the benchmark to cache + """ self.cache_client.update_nosql( self.deployment_name(), benchmark, self._benchmark_resources[benchmark].serialize() ) def cosmos_client(self) -> CosmosClient: + """Get or create CosmosDB client. + + Initializes the CosmosDB client using the account credentials. + The client is cached after first initialization. + Returns: + CosmosClient instance for database operations. + """ if self._cosmos_client is None: self._cosmosdb_account = cast(AzureResources, self._cloud_resources).cosmosdb_account( @@ -146,13 +229,39 @@ def cosmos_client(self) -> CosmosClient: return self._cosmos_client def has_tables(self, benchmark: str) -> bool: + """Check if benchmark has allocated tables. + + Args: + benchmark: Name of the benchmark to check + + Returns: + True if benchmark has allocated resources, False otherwise. + """ return benchmark in self._benchmark_resources def benchmark_database(self, benchmark: str) -> str: + """Get database name for benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Name of the CosmosDB database for the benchmark. + + Raises: + KeyError: If benchmark resources are not allocated. + """ return self._benchmark_resources[benchmark].database def credentials(self) -> Tuple[str, str, str]: + """Get CosmosDB account credentials. + + Retrieves the account name, URL, and credential for CosmosDB access. + Initializes the CosmosDB account if not already done. + Returns: + Tuple containing (account_name, url, credential) for CosmosDB access. + """ # An update of function that uses fully cached data will have # to initialize it separately # There were no prior actions that initialized this variable @@ -174,7 +283,22 @@ def write_to_table( data: dict, primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, - ): + ) -> None: + """Write data to CosmosDB container. + + Inserts data into the specified container with required key fields. + CosmosDB requires both a partition key and an 'id' field for documents. + + Args: + benchmark: Name of the benchmark + table: Name of the container/table + data: Dictionary data to insert + primary_key: Tuple of (key_name, key_value) for partition key + secondary_key: Tuple of (key_name, key_value) for document id + + Raises: + AssertionError: If table name cannot be resolved or secondary_key is None. + """ res = self._benchmark_resources[benchmark] table_name = self._get_table_name(benchmark, table) assert table_name is not None @@ -194,16 +318,32 @@ def write_to_table( def create_table( self, benchmark: str, name: str, primary_key: str, _: Optional[str] = None ) -> str: + """Create CosmosDB container for benchmark table. + + Creates a new CosmosDB database and container for the benchmark if they + don't exist. Each benchmark gets its own database, and containers are + created within that database for each table. + + Args: + benchmark: Name of the benchmark + name: Name of the container/table to create + primary_key: Partition key field name for the container + _: Unused parameter for compatibility with base class + + Returns: + Name of the created container. + Raises: + CosmosResourceNotFoundError: If database or container operations fail. + """ benchmark_resources = self._benchmark_resources.get(benchmark, None) if benchmark_resources is not None and name in benchmark_resources.containers: self.logging.info(f"Using cached CosmosDB container {name}") - """ - For some reason, creating the client is enough to verify existence of db/container. - We need to force the client to make some actions; that's why we call read. - """ + # For some reason, creating the client is enough to verify existence of db/container. + # We need to force the client to make some actions; that's why we call read. + # Each benchmark receives its own CosmosDB database if benchmark_resources is None: @@ -228,7 +368,6 @@ def create_table( ) try: - # verify it exists benchmark_resources.database_client.get_container_client(name).read() self.logging.info(f"Using existing CosmosDB container {name}") @@ -245,7 +384,29 @@ def create_table( return name def clear_table(self, name: str) -> str: + """Clear all data from a table. + + Args: + name: Name of the table to clear + + Returns: + Name of the cleared table. + + Raises: + NotImplementedError: This operation is not yet implemented. + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """Remove a table completely. + + Args: + name: Name of the table to remove + + Returns: + Name of the removed table. + + Raises: + NotImplementedError: This operation is not yet implemented. + """ raise NotImplementedError() diff --git a/sebs/azure/system_resources.py b/sebs/azure/system_resources.py index 0e3494d1..960d5185 100644 --- a/sebs/azure/system_resources.py +++ b/sebs/azure/system_resources.py @@ -1,25 +1,57 @@ +"""Azure system resources management for SeBS. + +This module provides Azure-specific system resource management including +storage accounts, CosmosDB instances, and Azure CLI management for +serverless benchmark execution. +""" + import json -from typing import cast, Optional +from typing import Optional, cast + +import docker -from sebs.config import SeBSConfig -from sebs.azure.config import AzureConfig from sebs.azure.blob_storage import BlobStorage -from sebs.azure.cosmosdb import CosmosDB from sebs.azure.cli import AzureCLI +from sebs.azure.config import AzureConfig +from sebs.azure.cosmosdb import CosmosDB from sebs.cache import Cache +from sebs.config import SeBSConfig from sebs.faas.resources import SystemResources from sebs.utils import LoggingHandlers -import docker - class AzureSystemResources(SystemResources): + """Azure system resources manager for SeBS benchmarking. + + Manages Azure-specific system resources including Blob Storage, + CosmosDB for NoSQL operations, and Azure CLI for resource management. + Handles authentication, resource initialization, and lifecycle management. + + Attributes: + _logging_handlers (LoggingHandlers): Logging configuration handlers + _storage (Optional[BlobStorage]): Azure Blob Storage instance + _nosql_storage (Optional[CosmosDB]): Azure CosmosDB instance + _cli_instance (Optional[AzureCLI]): Azure CLI Docker container instance + _system_config (SeBSConfig): SeBS system configuration + _cli_instance_stop (bool): Flag to control CLI instance lifecycle + """ + @staticmethod def typename() -> str: + """Get the system resources type name. + + Returns: + str: Type identifier for Azure system resources. + """ return "Azure.SystemResources" @property def config(self) -> AzureConfig: + """Get the Azure configuration. + + Returns: + AzureConfig: Azure-specific configuration instance. + """ return cast(AzureConfig, self._config) def __init__( @@ -27,9 +59,18 @@ def __init__( system_config: SeBSConfig, config: AzureConfig, cache_client: Cache, - docker_client: docker.client, + docker_client: docker.client.DockerClient, logger_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize Azure system resources. + + Args: + system_config (SeBSConfig): SeBS system configuration + config (AzureConfig): Azure-specific configuration + cache_client (Cache): Cache for storing resource information + docker_client (docker.client.DockerClient): Docker client for container management + logger_handlers (LoggingHandlers): Logging configuration handlers + """ super().__init__(config, cache_client, docker_client) self._logging_handlers = logger_handlers @@ -37,19 +78,24 @@ def __init__( self._nosql_storage: Optional[CosmosDB] = None self._cli_instance: Optional[AzureCLI] = None self._system_config = system_config + self._cli_instance_stop: bool = True - """ - Create wrapper object for Azure blob storage. - First ensure that storage account is created and connection string - is known. Then, create wrapper and create request number of buckets. + def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: + """Get or create Azure Blob Storage instance. + + Creates wrapper object for Azure blob storage with proper authentication. + First ensures that storage account is created and connection string + is known, then creates wrapper and creates requested number of buckets. Requires Azure CLI instance in Docker to obtain storage account details. - :param replace_existing: when true, replace existing files in input buckets - :return: Azure storage instance - """ + Args: + replace_existing (Optional[bool]): When True, replace existing files in input buckets. + If None, defaults to False. - def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: + Returns: + BlobStorage: Azure Blob Storage instance for benchmark data management. + """ if self._storage is None: self._storage = BlobStorage( self.config.region, @@ -64,14 +110,30 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: return self._storage def get_nosql_storage(self) -> CosmosDB: + """Get or create Azure CosmosDB instance. + + Creates and configures CosmosDB instance for NoSQL benchmark operations. + Handles authentication and database/container creation as needed. + + Returns: + CosmosDB: Azure CosmosDB instance for NoSQL operations. + """ if self._nosql_storage is None: self._nosql_storage = CosmosDB( self.cli_instance, self._cache_client, self.config.resources, self.config.region ) return self._nosql_storage - def _login_cli(self): + def _login_cli(self) -> None: + """Login to Azure CLI using service principal credentials. + Authenticates with Azure using the configured service principal + credentials and validates subscription access. + + Raises: + RuntimeError: If no valid subscription is found or multiple subscriptions exist. + AssertionError: If CLI instance is not initialized. + """ assert self._cli_instance is not None output = self._cli_instance.login( @@ -90,7 +152,14 @@ def _login_cli(self): @property def cli_instance(self) -> AzureCLI: + """Get or create Azure CLI instance. + + Creates and initializes Azure CLI Docker container if not already created. + Handles authentication automatically on first access. + Returns: + AzureCLI: Azure CLI instance for executing Azure commands. + """ if self._cli_instance is None: self._cli_instance = AzureCLI(self._system_config, self._docker_client) self._cli_instance_stop = True @@ -99,7 +168,17 @@ def cli_instance(self) -> AzureCLI: return self._cli_instance - def initialize_cli(self, cli: AzureCLI, login: bool = False): + def initialize_cli(self, cli: AzureCLI, login: bool = False) -> None: + """Initialize with existing Azure CLI instance. + + Allows using an external Azure CLI instance instead of creating a new one. + Useful for sharing CLI instances across multiple resource managers. + + Args: + cli (AzureCLI): External Azure CLI instance to use + login (bool): Whether to perform login with this CLI instance. + Defaults to False. + """ self._cli_instance = cli self._cli_instance_stop = False @@ -107,5 +186,10 @@ def initialize_cli(self, cli: AzureCLI, login: bool = False): self._login_cli() def shutdown(self) -> None: + """Shutdown Azure system resources. + + Cleans up Azure CLI Docker container and other resources. + Only shuts down CLI if it was created by this instance. + """ if self._cli_instance and self._cli_instance_stop: self._cli_instance.shutdown() diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 9757c0a5..20218365 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -149,15 +149,21 @@ class Benchmark(LoggingBase): Creates code package representing a benchmark with all code and assets. This class handles building, packaging, and deploying benchmark code for - serverless platforms. It manages dependencies installation within Docker - images corresponding to the target cloud deployment. - - The behavior of the class depends on cache state: - - 1. If there's no cache entry, a code package is built - 2. Otherwise, the hash of the entire benchmark is computed and compared - with the cached value. If changed, it rebuilds the benchmark - 3. Otherwise, it returns the path to cached code + serverless platforms. + This includes copying source files, adding deployment-specific wrappers, + adding deployment-specific dependencies, and installing application dependencies + within Docker images corresponding to the target cloud deployment. + Code packages are cached. + + The behavior of this class, particularly the `build` method, depends on the + state of the SeBS cache: + + 1. If no cache entry exists for the benchmark (for the current language, deployment, etc.), + a new code package is built. + 2. If a cache entry exists, the hash of the benchmark's source directory is computed + and compared with the hash of cached package. If they differ, or if an update is forced, + the package is rebuilt. + 3. Otherwise (cache entry exists and hash matches), the cached code package is used. Attributes: benchmark: Name of the benchmark @@ -227,7 +233,8 @@ def benchmark_config(self) -> BenchmarkConfig: @property def code_package(self) -> Dict[str, Any]: """ - Get the code package information. + Get the cached code package information, if available. + This typically includes 'location' (relative to cache_dir), 'hash', and 'size'. Returns: Dict[str, Any]: Dictionary with code package information @@ -238,7 +245,8 @@ def code_package(self) -> Dict[str, Any]: @property def functions(self) -> Dict[str, Any]: """ - Get the functions for this benchmark. + Get the cached information about deployed functions associated + with this benchmark for the current deployment, keyed by function name. Returns: Dict[str, Any]: Dictionary of functions @@ -249,7 +257,9 @@ def functions(self) -> Dict[str, Any]: @property def code_location(self) -> str: """ - Get the location of the code package. + Get the absolute path to the prepared code package. + If cached, it points to the location within the SeBS cache directory. + Otherwise, it points to the build output directory. Returns: str: Path to the code package @@ -283,7 +293,8 @@ def is_cached(self, val: bool): @property def is_cached_valid(self) -> bool: """ - Check if the cached benchmark is valid. + True if a cached code package exists and its hash matches the current + benchmark source code hash. Returns: bool: True if valid, False otherwise @@ -337,7 +348,7 @@ def language(self) -> "Language": @property def language_name(self) -> str: """ - Get the name of the programming language. + Get the name of the programming language, e.g., "python". Returns: str: Name of the language @@ -347,7 +358,7 @@ def language_name(self) -> str: @property def language_version(self) -> str: """ - Get the version of the programming language. + Get the version of the programming language, e.g. "3.8". Returns: str: Version of the language @@ -468,9 +479,12 @@ def __init__( self._language_version = config.runtime.version self._architecture = self._experiment_config.architecture self._container_deployment = config.container_deployment - self._benchmark_path = find_benchmark(self.benchmark, "benchmarks") - if not self._benchmark_path: + + benchmark_path = find_benchmark(self.benchmark, "benchmarks") + if not benchmark_path: raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=self._benchmark)) + self._benchmark_path = benchmark_path + with open(os.path.join(self.benchmark_path, "config.json")) as json_file: self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) @@ -600,7 +614,16 @@ def query_cache(self) -> None: self._is_cached = False self._is_cached_valid = False - def copy_code(self, output_dir): + def copy_code(self, output_dir: str) -> None: + """Copy benchmark source code to output directory. + + Copies language-specific source files and dependency files from the + benchmark directory to the output directory for deployment preparation. + Handles both Python requirements files and Node.js package.json files. + + Args: + output_dir: Destination directory for copied files + """ FILES = { "python": ["*.py", "requirements.txt*"], "nodejs": ["*.js", "package.json"], @@ -614,7 +637,16 @@ def copy_code(self, output_dir): if os.path.exists(nodejs_package_json): shutil.copy2(nodejs_package_json, os.path.join(output_dir, "package.json")) - def add_benchmark_data(self, output_dir): + def add_benchmark_data(self, output_dir: str) -> None: + """Add benchmark-specific data and assets to output directory. + + Executes benchmark initialization scripts (init.sh) if present in + the benchmark directory. These scripts typically download or generate + additional data files required by the benchmark. + + Args: + output_dir: Directory where benchmark data should be added + """ cmd = "/bin/bash {benchmark_path}/init.sh {output_dir} false {architecture}" paths = [ self.benchmark_path, @@ -633,7 +665,16 @@ def add_benchmark_data(self, output_dir): stderr=subprocess.STDOUT, ) - def add_deployment_files(self, output_dir): + def add_deployment_files(self, output_dir: str) -> None: + """Add deployment-specific wrapper files to output directory. + + Copies platform-specific wrapper files (handlers, adapters) that + integrate the benchmark code with the target FaaS platform's + execution environment. + + Args: + output_dir: Directory where deployment files should be added + """ handlers_dir = project_absolute_path( "benchmarks", "wrappers", self._deployment_name, self.language_name ) @@ -646,7 +687,15 @@ def add_deployment_files(self, output_dir): for file in handlers: shutil.copy2(file, os.path.join(output_dir)) - def add_deployment_package_python(self, output_dir): + def add_deployment_package_python(self, output_dir: str) -> None: + """Add Python deployment packages to requirements file. + + Appends platform-specific Python packages and benchmark module + dependencies to the requirements.txt file for the deployment. + + Args: + output_dir: Directory containing the requirements file to modify + """ destination_file = f"requirements.txt.{self._language_version}" if not os.path.exists(os.path.join(output_dir, destination_file)): @@ -669,7 +718,15 @@ def add_deployment_package_python(self, output_dir): for package in module_packages[bench_module.value]: out.write(package) - def add_deployment_package_nodejs(self, output_dir): + def add_deployment_package_nodejs(self, output_dir: str) -> None: + """Add Node.js deployment packages to package.json. + + Modifies the package.json file to include platform-specific + Node.js dependencies required for deployment. + + Args: + output_dir: Directory containing the package.json file to modify + """ # modify package.json packages = self._system_config.deployment_packages( self._deployment_name, self.language_name @@ -687,7 +744,18 @@ def add_deployment_package_nodejs(self, output_dir): with open(package_config, "w") as package_file: json.dump(package_json, package_file, indent=2) - def add_deployment_package(self, output_dir): + def add_deployment_package(self, output_dir: str) -> None: + """Add deployment packages based on programming language. + + Delegates to language-specific package addition methods to include + platform-specific dependencies in the deployment package. + + Args: + output_dir: Directory where deployment packages should be added + + Raises: + NotImplementedError: If the language is not supported + """ from sebs.faas.function import Language if self.language == Language.PYTHON: @@ -698,14 +766,39 @@ def add_deployment_package(self, output_dir): raise NotImplementedError @staticmethod - def directory_size(directory: str): + def directory_size(directory: str) -> int: + """Calculate total size of all files in a directory. + + Recursively calculates the total size in bytes of all files + within the specified directory and its subdirectories. + + Args: + directory: Path to the directory to measure + + Returns: + int: Total size in bytes of all files in the directory + """ from pathlib import Path root = Path(directory) sizes = [f.stat().st_size for f in root.glob("**/*") if f.is_file()] return sum(sizes) - def install_dependencies(self, output_dir): + def install_dependencies(self, output_dir: str) -> None: + """Install benchmark dependencies using Docker. + + Uses Docker containers to install language-specific dependencies + (pip packages for Python, npm packages for Node.js) in an environment + matching the target deployment platform. Handles both volume mounting + and file copying approaches for Docker compatibility. + + Args: + output_dir: Directory containing the code package to build + + Raises: + RuntimeError: If Docker image pull fails + docker.errors.ContainerError: If dependency installation fails + """ # do we have docker image for this run and language? if "build" not in self._system_config.docker_image_types( self._deployment_name, self.language_name @@ -837,9 +930,9 @@ def ensure_image(name: str) -> None: ) # copy updated code with package data, stat = container.get_archive("/mnt/function") - with open(tar_archive, "wb") as f: + with open(tar_archive, "wb") as output_filef: for chunk in data: - f.write(chunk) + output_filef.write(chunk) with tarfile.open(tar_archive, "r") as tar: tar.extractall(output_dir) # docker packs the entire directory with basename function @@ -862,7 +955,15 @@ def ensure_image(name: str) -> None: self.logging.error(f"Docker mount volumes: {volumes}") raise e - def recalculate_code_size(self): + def recalculate_code_size(self) -> int: + """Recalculate and update the code package size. + + Measures the current size of the output directory and updates + the internal code size tracking. + + Returns: + int: Updated code package size in bytes + """ self._code_size = Benchmark.directory_size(self._output_dir) return self._code_size @@ -872,6 +973,27 @@ def build( [str, str, str, str, str, bool, bool], Tuple[str, int, str] ], ) -> Tuple[bool, str, bool, str]: + """Build the complete benchmark deployment package. + + Orchestrates the entire build process for a benchmark, including: + - Cache validation and reuse if possible + - Code copying and dependency installation + - Platform-specific build steps + - Cache updates after successful build + + Args: + deployment_build_step: Platform-specific build function that takes + (output_dir, language, version, architecture, benchmark_name, + is_cached_valid, container_deployment) and returns + (code_location, code_size, container_uri) + + Returns: + Tuple containing: + - bool: Whether a new build was performed (False if cached) + - str: Path to the built code package + - bool: Whether this is a container deployment + - str: Container URI (empty string if not container deployment) + """ # Skip build if files are up to date and user didn't enforce rebuild if self.is_cached and self.is_cached_valid: @@ -944,7 +1066,21 @@ def build( def prepare_input( self, system_resources: SystemResources, size: str, replace_existing: bool = False - ): + ) -> Dict[str, str]: + """Prepare benchmark input data and allocate cloud resources. + + Handles the setup of cloud storage buckets and NoSQL databases + required by the benchmark. Generates benchmark-specific input data + and uploads it to the appropriate cloud storage systems. + + Args: + system_resources: Cloud system resources manager + size: Benchmark workload size ('small', 'medium', 'large') + replace_existing: Whether to replace existing input data + + Returns: + Dict[str, str]: Input configuration for the benchmark function + """ """ Handle object storage buckets. @@ -1019,7 +1155,19 @@ def prepare_input( This step allows to modify code package without going through the entire pipeline. """ - def code_package_modify(self, filename: str, data: bytes): + def code_package_modify(self, filename: str, data: bytes) -> None: + """Modify a file within the deployed code package. + + Updates a specific file within the code package without rebuilding + the entire package. Currently only supports ZIP archive packages. + + Args: + filename: Name of the file to modify within the package + data: New content for the file as bytes + + Raises: + NotImplementedError: If the code package is not a ZIP archive + """ if self.code_package_is_archive(): self._update_zip(self.code_location, filename, data) @@ -1034,19 +1182,45 @@ def code_package_modify(self, filename: str, data: bytes): """ def code_package_is_archive(self) -> bool: + """Check if the code package is an archive file. + + Determines whether the code package is stored as an archive file + (ZIP) rather than a directory structure. + + Returns: + bool: True if package is a ZIP archive, False if it's a directory + """ if os.path.isfile(self.code_location): extension = os.path.splitext(self.code_location)[1] return extension in [".zip"] return False def code_package_recompute_size(self) -> float: + """Recalculate the size of the code package file. + + Updates the internal size tracking after modifications to the + code package file. + + Returns: + float: Updated package size in bytes + """ bytes_size = os.path.getsize(self.code_location) self._code_size = bytes_size return bytes_size # https://stackoverflow.com/questions/25738523/how-to-update-one-file-inside-zip-file-using-python @staticmethod - def _update_zip(zipname: str, filename: str, data: bytes): + def _update_zip(zipname: str, filename: str, data: bytes) -> None: + """Update a file within a ZIP archive. + + Replaces the content of a specific file within a ZIP archive + while preserving all other files and archive metadata. + + Args: + zipname: Path to the ZIP archive to modify + filename: Name of the file to update within the archive + data: New content for the file as bytes + """ import zipfile import tempfile @@ -1078,14 +1252,41 @@ def _update_zip(zipname: str, filename: str, data: bytes): class BenchmarkModuleInterface: + """Interface definition for benchmark input modules. + + This abstract class defines the interface that benchmark input modules + must implement to provide input data generation, storage allocation, + and NoSQL database setup for benchmarks. + + All methods are static as they operate on benchmark data rather than + instance state. Benchmark modules are dynamically loaded from the + input.py file in each benchmark directory. + """ + @staticmethod @abstractmethod def buckets_count() -> Tuple[int, int]: + """Get the number of storage buckets required by the benchmark. + + Returns: + Tuple[int, int]: Number of (input_buckets, output_buckets) needed + """ pass @staticmethod @abstractmethod - def allocate_nosql() -> dict: + def allocate_nosql() -> Dict[str, Dict[str, str]]: + """Define NoSQL table requirements for the benchmark. + + Returns: + Dict containing table definitions with primary and secondary keys: + { + 'table_name': { + 'primary_key': 'key_field_name', + 'secondary_key': 'optional_secondary_key_name' + } + } + """ pass @staticmethod @@ -1101,10 +1302,43 @@ def generate_input( Callable[[str, str, dict, Tuple[str, str], Optional[Tuple[str, str]]], None] ], ) -> Dict[str, str]: + """Generate benchmark input data and configuration. + + Creates the input data files and configuration needed for benchmark + execution, uploading data to cloud storage and NoSQL databases as needed. + + Args: + data_dir: Directory containing benchmark data files + size: Benchmark workload size ('small', 'medium', 'large') + benchmarks_bucket: Name of the cloud storage bucket for data + input_paths: List of input data paths in cloud storage + output_paths: List of output data paths in cloud storage + upload_func: Function for uploading files to cloud storage + nosql_func: Function for writing data to NoSQL databases + + Returns: + Dict[str, str]: Input configuration dictionary for the benchmark + """ pass def load_benchmark_input(benchmark_path: str) -> BenchmarkModuleInterface: + """Dynamically load the input module for a benchmark. + + Loads the input.py file from the benchmark directory and returns it + as a module interface for generating benchmark input data. + + Args: + benchmark_path: Path to the benchmark directory containing input.py + + Returns: + BenchmarkModuleInterface: Loaded input module with benchmark-specific + input generation functions + + Raises: + FileNotFoundError: If input.py is not found in the benchmark directory + ImportError: If the input module cannot be loaded + """ # Look for input generator file in the directory containing benchmark import importlib.machinery import importlib.util diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 19c7d3ab..9da60d86 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -1,3 +1,31 @@ +"""Configuration management for Function-as-a-Service (FaaS) systems. + +This module provides abstract base classes for managing configurations across +different FaaS platforms (AWS Lambda, Azure Functions, Google Cloud Functions, +OpenWhisk, etc.). It defines the core interfaces for: + +- Credentials management and authentication +- Resource allocation and management +- Platform-specific configuration settings +- Configuration serialization and caching + +The module follows a hierarchical structure where each platform implements these +abstract classes with their specific authentication methods, resource types, +and configuration parameters. All configurations support caching to avoid +repeated initialization and provide persistence across benchmark runs. + +Classes: + Credentials: Abstract base for platform authentication credentials + Resources: Abstract base for cloud resource management + Config: Abstract base for complete platform configuration + +The credentials initialization follows this precedence order: +1. Load credentials from cache +2. Override with any new values provided in config +3. Fall back to environment variables +4. Report failure if no credentials are available +""" + from __future__ import annotations from abc import ABC @@ -8,66 +36,122 @@ from sebs.cache import Cache from sebs.utils import has_platform, LoggingBase, LoggingHandlers -# FIXME: Replace type hints for static generators after migration to 3.7 -# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel - -""" - Credentials for FaaS system used to authorize operations on functions - and other resources. - - The order of credentials initialization: - 1. Load credentials from cache. - 2. If any new values are provided in the config, they override cache values. - 3. If nothing is provided, initialize using environmental variables. - 4. If no information is provided, then failure is reported. -""" - class Credentials(ABC, LoggingBase): + """Abstract base class for FaaS platform authentication credentials. + + This class defines the interface for managing authentication credentials + across different FaaS platforms. Each platform implementation provides + specific credential types (API keys, service account files, connection + strings, etc.) while following the common serialization and caching + patterns defined here. + + Platform implementations must handle: + - Loading credentials from user configuration + - Fallback to environment variables + - Secure storage in cache + - Credential validation and refresh + """ + def __init__(self): + """Initialize the credentials base class with logging support.""" super().__init__() - """ - Create credentials instance from user config and cached values. - """ - @staticmethod @abstractmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Credentials": - pass + """Create credentials instance from user config and cached values. - """ - Serialize to JSON for storage in cache. - """ + This method implements the credential loading hierarchy: + 1. Use new config values if provided + 2. Fall back to cached credentials + 3. Load from environment variables + 4. Fail if no credentials available - @abstractmethod - def serialize(self) -> dict: + Args: + config: User-provided configuration dictionary + cache: Cache instance for loading stored credentials + handlers: Logging handlers for error reporting + + Returns: + Credentials: Platform-specific credentials instance + + Raises: + RuntimeError: If no valid credentials can be loaded + """ pass + @abstractmethod + def serialize(self) -> dict: + """Serialize credentials to dictionary for cache storage. -""" - Class grouping resources allocated at the FaaS system to execute functions - and deploy various services. Examples might include IAM roles and API gateways - for HTTP triggers. + Returns: + dict: Serialized credential data suitable for JSON storage - Storage resources are handled seperately. -""" + Note: + Implementations should be careful about storing sensitive + information and may choose to exclude certain fields. + """ + pass class Resources(ABC, LoggingBase): + """Abstract base class for FaaS platform resource management. + + This class manages cloud resources allocated for function execution and + deployment across different FaaS platforms. Resources include infrastructure + components like IAM roles, API gateways, networking components, and storage + buckets needed to support serverless function deployment and execution. + + Storage resources (object storage, NoSQL databases) are handled separately + through dedicated storage classes, while this class focuses on compute + and deployment infrastructure. + + Key responsibilities: + - Resource ID management and generation + - Storage bucket lifecycle management + - Platform-specific resource provisioning + - Resource serialization and caching + - Resource cleanup and deallocation + """ + class StorageBucketType(str, Enum): + """Enumeration of storage bucket types used by SeBS. + + Different bucket types serve different purposes in the benchmarking workflow: + - DEPLOYMENT: Stores function deployment packages (ZIP files, containers) + - BENCHMARKS: Stores benchmark input data and test files + - EXPERIMENTS: Stores experiment results and output data + """ + DEPLOYMENT = "deployment" BENCHMARKS = "benchmarks" EXPERIMENTS = "experiments" @staticmethod - def deserialize(val: str) -> Resources.StorageBucketType: + def deserialize(val: str) -> "Resources.StorageBucketType": + """Deserialize a string value to a StorageBucketType enum. + + Args: + val: String value to convert to enum + + Returns: + StorageBucketType: Corresponding enum value + + Raises: + Exception: If the value doesn't match any enum member + """ for member in Resources.StorageBucketType: if member.value == val: return member raise Exception(f"Unknown storage bucket type type {val}") def __init__(self, name: str): + """Initialize the resources base class. + + Args: + name: Platform name (e.g., 'aws', 'azure', 'gcp') + """ super().__init__() self._name = name self._buckets: Dict[Resources.StorageBucketType, str] = {} @@ -75,38 +159,99 @@ def __init__(self, name: str): @property def resources_id(self) -> str: + """Get the unique resource ID for this deployment. + + Returns: + str: Unique resource identifier + + Raises: + AssertionError: If no resource ID has been set + """ assert self._resources_id is not None return self._resources_id @resources_id.setter def resources_id(self, resources_id: str): + """Set the unique resource ID for this deployment. + + Args: + resources_id: Unique identifier for resource grouping + """ self._resources_id = resources_id @property def has_resources_id(self) -> bool: + """Check if a resource ID has been assigned. + + Returns: + bool: True if resource ID is set, False otherwise + """ return self._resources_id is not None @property def region(self) -> str: + """Get the cloud region for resource deployment. + + Returns: + str: Cloud region identifier + """ return self._region @region.setter def region(self, region: str): + """Set the cloud region for resource deployment. + + Args: + region: Cloud region identifier + """ self._region = region def get_storage_bucket(self, bucket_type: Resources.StorageBucketType) -> Optional[str]: + """Get the bucket name for a specific bucket type. + + Args: + bucket_type: Type of bucket to retrieve + + Returns: + Optional[str]: Bucket name if set, None otherwise + """ return self._buckets.get(bucket_type) def get_storage_bucket_name(self, bucket_type: Resources.StorageBucketType) -> str: + """Generate a standardized bucket name for a bucket type. + + Creates bucket names following the pattern: sebs-{type}-{resource_id} + + Args: + bucket_type: Type of bucket to name + + Returns: + str: Generated bucket name + """ return f"sebs-{bucket_type.value}-{self._resources_id}" def set_storage_bucket(self, bucket_type: Resources.StorageBucketType, bucket_name: str): + """Set the bucket name for a specific bucket type. + + Args: + bucket_type: Type of bucket to set + bucket_name: Name of the bucket + """ self._buckets[bucket_type] = bucket_name @staticmethod @abstractmethod - def initialize(res: Resources, dct: dict): + def initialize(res: "Resources", dct: dict): + """Initialize a Resources instance from configuration dictionary. + This base implementation handles common resource initialization + including resource ID and storage bucket configuration. Platform-specific + implementations should call this method and add their own initialization. + + Args: + res: Resources instance to initialize + dct: Configuration dictionary from cache or user config + """ if "resources_id" in dct: res._resources_id = dct["resources_id"] @@ -114,21 +259,28 @@ def initialize(res: Resources, dct: dict): for key, value in dct["storage_buckets"].items(): res._buckets[Resources.StorageBucketType.deserialize(key)] = value - """ - Create credentials instance from user config and cached values. - """ - @staticmethod @abstractmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": - pass + """Create resources instance from user config and cached values. - """ - Serialize to JSON for storage in cache. - """ + Args: + config: User-provided configuration dictionary + cache: Cache instance for loading stored resources + handlers: Logging handlers for error reporting + + Returns: + Resources: Platform-specific resources instance + """ + pass @abstractmethod def serialize(self) -> dict: + """Serialize resources to dictionary for cache storage. + + Returns: + dict: Serialized resource data including resource ID and bucket mappings + """ out = {} if self.has_resources_id: out["resources_id"] = self.resources_id @@ -137,6 +289,14 @@ def serialize(self) -> dict: return out def update_cache(self, cache: Cache): + """Update the cache with current resource configuration. + + Stores the resource ID and storage bucket mappings in the cache + for future retrieval. + + Args: + cache: Cache instance to update + """ if self.has_resources_id: cache.update_config( val=self.resources_id, keys=[self._name, "resources", "resources_id"] @@ -147,43 +307,94 @@ def update_cache(self, cache: Cache): ) -""" - FaaS system config defining cloud region (if necessary), credentials and - resources allocated. -""" - - class Config(ABC, LoggingBase): + """Abstract base class for complete FaaS platform configuration. + + This class combines credentials and resources into a complete platform + configuration, along with platform-specific settings like region selection. + It provides the top-level configuration interface used throughout the + benchmarking framework. + + The Config class coordinates: + - Platform credentials for authentication + - Resource allocation and management + - Regional deployment settings + - Configuration persistence and caching + - Platform-specific parameter handling + """ _region: str def __init__(self, name: str): + """Initialize the configuration base class. + + Args: + name: Platform name (e.g., 'aws', 'azure', 'gcp') + """ super().__init__() self._region = "" self._name = name @property def region(self) -> str: + """Get the cloud region for deployment. + + Returns: + str: Cloud region identifier + """ return self._region @property @abstractmethod def credentials(self) -> Credentials: + """Get the platform credentials. + + Returns: + Credentials: Platform-specific credentials instance + """ pass @property @abstractmethod def resources(self) -> Resources: + """Get the platform resources. + + Returns: + Resources: Platform-specific resources instance + """ pass @staticmethod @abstractmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: "Config", dct: dict): + """Initialize a Config instance from configuration dictionary. + + Args: + cfg: Config instance to initialize + dct: Configuration dictionary + """ cfg._region = dct["region"] @staticmethod @abstractmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Config": + """Create configuration instance from user config and cached values. + + This method serves as a factory for platform-specific configurations, + dynamically loading the appropriate implementation based on the platform + name specified in the configuration. + + Args: + config: User-provided configuration dictionary + cache: Cache instance for loading stored configuration + handlers: Logging handlers for error reporting + + Returns: + Config: Platform-specific configuration instance + + Raises: + AssertionError: If the platform type is unknown or unsupported + """ from sebs.local.config import LocalConfig name = config["name"] @@ -210,8 +421,18 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config @abstractmethod def serialize(self) -> dict: + """Serialize configuration to dictionary for cache storage. + + Returns: + dict: Serialized configuration including platform name and region + """ return {"name": self._name, "region": self._region} @abstractmethod def update_cache(self, cache: Cache): + """Update the cache with current configuration settings. + + Args: + cache: Cache instance to update + """ cache.update_config(val=self.region, keys=[self._name, "region"]) diff --git a/sebs/faas/container.py b/sebs/faas/container.py index b17525f7..3894f2bc 100644 --- a/sebs/faas/container.py +++ b/sebs/faas/container.py @@ -1,3 +1,19 @@ +"""Docker container management for serverless function deployments. + +This module provides the DockerContainer class for building and managing +Docker containers for serverless function deployments. It handles: + +- Building benchmark Docker images for different platforms +- Cross-architecture container compilation with emulation +- Container registry operations (push/pull) +- Progress tracking for container operations +- Platform-specific container naming and tagging + +The module supports container-based deployments across different serverless +platforms, with automatic detection of the host architecture and appropriate +configuration for cross-compilation when needed. +""" + from abc import abstractmethod import docker import json @@ -13,17 +29,53 @@ class DockerContainer(LoggingBase): + """Abstract base class for Docker container management in serverless deployments. + + This class provides common functionality for building, pushing, and managing + Docker containers for serverless function deployments. Each platform + implementation (AWS, Azure, GCP, etc.) extends this class to provide + platform-specific container handling. + + Key features: + - Container image building with cross-architecture support + - Container registry operations (push/pull/inspect) + - Progress tracking for long-running operations + - Platform-specific image naming and tagging + - Caching and optimization for repeated builds + + Attributes: + docker_client: Docker client for container operations + experimental_manifest: Whether to use experimental manifest inspection + system_config: SeBS configuration for image management + _disable_rich_output: Flag to disable rich progress output + """ + @staticmethod @abstractmethod def name() -> str: + """Get the platform name for this container implementation. + + Returns: + str: Platform name (e.g., 'aws', 'azure', 'gcp') + """ pass @property def disable_rich_output(self) -> bool: + """Get whether rich output is disabled. + + Returns: + bool: True if rich output is disabled, False otherwise + """ return self._disable_rich_output @disable_rich_output.setter def disable_rich_output(self, val: bool): + """Set whether to disable rich output. + + Args: + val: True to disable rich output, False to enable + """ self._disable_rich_output = val def __init__( @@ -32,6 +84,13 @@ def __init__( docker_client, experimental_manifest: bool = False, ): + """Initialize the Docker container manager. + + Args: + system_config: SeBS configuration for container management + docker_client: Docker client for container operations + experimental_manifest: Whether to use experimental manifest features + """ super().__init__() self.docker_client = docker_client @@ -40,7 +99,18 @@ def __init__( self._disable_rich_output = False def find_image(self, repository_name, image_tag) -> bool: + """Check if a Docker image exists in the registry. + + Attempts to find an image in the registry using either experimental + manifest inspection (if enabled) or by attempting to pull the image. + Args: + repository_name: Name of the repository (e.g., 'my-repo/my-image') + image_tag: Tag of the image to find + + Returns: + bool: True if the image exists, False otherwise + """ if self.experimental_manifest: try: # This requires enabling experimental Docker features @@ -58,7 +128,20 @@ def find_image(self, repository_name, image_tag) -> bool: return False def show_progress(self, txt: str, progress: Progress, layer_tasks: dict): + """Update progress display for Docker operations. + + Parses Docker API output and updates the rich progress display for + operations like image pushing. Tracks individual layer progress and + handles completion events. + + Args: + txt: Docker API output line (JSON string or dict) + progress: Rich progress instance to update + layer_tasks: Dictionary tracking progress tasks for each layer + Raises: + Exception: If an error is reported in the Docker output + """ if isinstance(txt, str): line = json.loads(txt) else: @@ -89,6 +172,20 @@ def show_progress(self, txt: str, progress: Progress, layer_tasks: dict): raise Exception(line["error"]) def push_image(self, repository_uri, image_tag): + """Push a Docker image to a container registry. + + Pushes the specified image to the container registry with optional + progress tracking. Handles errors and provides informative logging + throughout the process. + + Args: + repository_uri: URI of the container registry repository + image_tag: Tag of the image to push + + Raises: + docker.errors.APIError: If the push operation fails + RuntimeError: If an error occurs during the push stream + """ try: if not self.disable_rich_output: @@ -124,6 +221,20 @@ def push_image(self, repository_uri, image_tag): def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: + """Generate registry name and image URI for a benchmark. + + Creates platform-specific naming for container images including + registry URL, repository name, image tag, and complete image URI. + + Args: + benchmark: Name of the benchmark (e.g., '110.dynamic-html') + language_name: Programming language (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (e.g., 'x64', 'arm64') + + Returns: + Tuple[str, str, str, str]: Registry name, repository name, image tag, full image URI + """ pass def build_base_image( diff --git a/sebs/faas/resources.py b/sebs/faas/resources.py index 140a719e..ee590881 100644 --- a/sebs/faas/resources.py +++ b/sebs/faas/resources.py @@ -1,3 +1,20 @@ +"""System resource management for FaaS platforms. + +This module provides the abstract base class for managing system-level resources +across different serverless platforms. It coordinates access to storage services, +NoSQL databases, and other cloud resources needed for benchmark execution. + +The SystemResources class serves as the resource factory and manager, handling: +- Storage service provisioning and access +- NoSQL database provisioning and access +- Resource lifecycle management +- Platform-specific resource configuration + +Each platform implementation (AWS, Azure, GCP, Local, etc.) provides concrete +implementations that handle platform-specific resource management while +following the common interface defined here. +""" + from abc import abstractmethod, ABC from typing import Optional @@ -11,33 +28,69 @@ class SystemResources(ABC, LoggingBase): + """Abstract base class for system-level resource management. + + This class provides a common interface for managing cloud resources needed + by benchmark functions across different serverless platforms. It handles the + provisioning and access to storage services, NoSQL databases, and other + platform-specific resources. + + The class serves as a factory and coordinator for different types of storage + and database services, ensuring they are properly configured and accessible + to benchmark functions during execution. + + Attributes: + _config: Platform configuration containing credentials and settings + _cache_client: Cache client for storing resource configurations + _docker_client: Docker client for container-based resource management + """ + def __init__(self, config: Config, cache_client: Cache, docker_client: docker.client): + """Initialize the system resources manager. + Args: + config: Platform configuration with credentials and settings + cache_client: Cache client for configuration persistence + docker_client: Docker client for container management + """ super().__init__() self._config = config self._cache_client = cache_client self._docker_client = docker_client - """ - Access persistent storage instance. - It might be a remote and truly persistent service (AWS S3, Azure Blob..), - or a dynamically allocated local instance. - - :param replace_existing: replace benchmark input data if exists already - """ - @abstractmethod def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: - pass + """Get or create a persistent storage instance. - """ - Access instance of NoSQL storage. - It might be a remote and truly persistent service (AWS DynamoDB, Azure CosmosDB..), - or a dynamically allocated local instance (ScyllaDB). + Provides access to object storage services (S3, Azure Blob, GCS, MinIO) + for storing benchmark input data, function packages, and results. The + storage instance may be a cloud service or a locally deployed container. - """ + Args: + replace_existing: Whether to replace existing benchmark data. + If None, uses the default behavior for the platform. + + Returns: + PersistentStorage: Configured storage instance ready for use + + Raises: + RuntimeError: If storage service cannot be provisioned or accessed + """ + pass @abstractmethod def get_nosql_storage(self) -> NoSQLStorage: + """Get or create a NoSQL database storage instance. + + Provides access to NoSQL database services (DynamoDB, CosmosDB, + Datastore, ScyllaDB) for benchmarks that require structured data + storage with key-value or document-based operations. + + Returns: + NoSQLStorage: Configured NoSQL storage instance ready for use + + Raises: + RuntimeError: If NoSQL service cannot be provisioned or accessed + """ pass diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index dc4cc544..01c0a978 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -1,3 +1,20 @@ +"""Object storage abstraction for serverless benchmarks. + +This module provides the PersistentStorage abstract base class for managing +object storage across different cloud platforms and local deployments. It +handles bucket management, file operations, and benchmark data organization. + +The storage abstraction supports: +- Cross-platform object storage (S3, Azure Blob, GCS, MinIO) +- Benchmark data organization with input/output separation +- Bucket lifecycle management and naming conventions +- File upload/download operations with caching +- Deployment discovery and resource management + +Each platform provides concrete implementations that handle platform-specific +API calls while following the common interface defined here. +""" + import os import re @@ -9,36 +26,89 @@ from sebs.cache import Cache from sebs.utils import LoggingBase -""" - Abstract class -""" - class PersistentStorage(ABC, LoggingBase): + """Abstract base class for persistent object storage implementations. + + This class defines the interface for object storage services across different + cloud platforms. It manages buckets, files, and benchmark data organization + while providing a consistent API regardless of the underlying storage service. + + Key responsibilities: + - Bucket lifecycle management (create, list, delete) + - File operations (upload, download, list) + - Benchmark data organization with input/output separation + - Storage configuration caching and management + - Cross-platform deployment discovery + + Attributes: + cached: Whether bucket configuration is cached + _cache_client: Cache client for storing configuration + _input_prefixes: List of input data prefixes for benchmarks + _output_prefixes: List of output data prefixes for benchmarks + input_prefixes_files: Files associated with input prefixes + _replace_existing: Whether to replace existing files during uploads + _region: Cloud region for storage operations + _cloud_resources: Resource configuration for the platform + """ + @staticmethod @abstractmethod def deployment_name() -> str: + """Get the deployment platform name. + + Returns: + str: Platform name (e.g., 'aws', 'azure', 'gcp', 'minio') + """ pass @property def cache_client(self) -> Cache: + """Get the cache client for configuration storage. + + Returns: + Cache: Cache client instance + """ return self._cache_client @property def replace_existing(self): + """Get whether to replace existing files during operations. + + Returns: + bool: True if existing files should be replaced, False otherwise + """ return self._replace_existing @replace_existing.setter def replace_existing(self, val: bool): + """Set whether to replace existing files during operations. + + Args: + val: True to replace existing files, False to skip + """ self._replace_existing = val @property def region(self): + """Get the cloud region for storage operations. + + Returns: + str: Cloud region identifier + """ return self._region def __init__( self, region: str, cache_client: Cache, resources: Resources, replace_existing: bool ): + """Initialize the persistent storage instance. + + Args: + region: Cloud region for storage operations + cache_client: Cache client for configuration persistence + resources: Resource configuration for the platform + replace_existing: Whether to replace existing files during uploads + """ super().__init__() self._cache_client = cache_client self.cached = False @@ -51,18 +121,47 @@ def __init__( @property def input_prefixes(self) -> List[str]: + """Get the list of input data prefixes for benchmarks. + + Returns: + List[str]: List of input prefix names + """ return self._input_prefixes @property def output_prefixes(self) -> List[str]: + """Get the list of output data prefixes for benchmarks. + + Returns: + List[str]: List of output prefix names + """ return self._output_prefixes @abstractmethod def correct_name(self, name: str) -> str: + """Correct a bucket name to comply with platform naming requirements. + + Different platforms have different naming restrictions (character sets, + length limits, etc.). This method applies platform-specific corrections. + + Args: + name: Original bucket name + + Returns: + str: Corrected bucket name that complies with platform requirements + """ pass def find_deployments(self) -> List[str]: + """Find existing SeBS deployments by scanning bucket names. + Scans all buckets in the storage service and extracts deployment IDs + from bucket names that follow the SeBS naming convention. This helps + identify existing deployments that can be reused. + + Returns: + List[str]: List of deployment resource IDs found in bucket names + """ deployments = [] buckets = self.list_buckets() for bucket in buckets: From 646796369122e78b2c8c0f9255eb35dc48970717 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 25 Jun 2025 19:42:33 +0200 Subject: [PATCH 20/21] [docs] Updated and corrected docstrings --- sebs/benchmark.py | 52 ++++++++------ sebs/cache.py | 39 ++++++---- sebs/config.py | 9 ++- sebs/experiments/config.py | 5 +- sebs/experiments/environment.py | 3 +- sebs/experiments/eviction_model.py | 66 +++++++++-------- sebs/experiments/invocation_overhead.py | 26 +++++-- sebs/experiments/network_ping_pong.py | 23 +++--- sebs/experiments/startup_time.py | 95 ------------------------- sebs/faas/config.py | 29 ++++---- sebs/faas/container.py | 31 +++++--- 11 files changed, 170 insertions(+), 208 deletions(-) delete mode 100644 sebs/experiments/startup_time.py diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 20218365..0e295fee 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -672,6 +672,8 @@ def add_deployment_files(self, output_dir: str) -> None: integrate the benchmark code with the target FaaS platform's execution environment. + Files are sourced from `benchmarks/wrappers/{deployment_name}/{language_name}/`. + Args: output_dir: Directory where deployment files should be added """ @@ -693,6 +695,8 @@ def add_deployment_package_python(self, output_dir: str) -> None: Appends platform-specific Python packages and benchmark module dependencies to the requirements.txt file for the deployment. + Handles versioned requirements files (e.g., requirements.txt.3.8). + Args: output_dir: Directory containing the requirements file to modify """ @@ -723,6 +727,7 @@ def add_deployment_package_nodejs(self, output_dir: str) -> None: Modifies the package.json file to include platform-specific Node.js dependencies required for deployment. + Handles versioned package.json files (e.g., package.json.12). Args: output_dir: Directory containing the package.json file to modify @@ -789,8 +794,14 @@ def install_dependencies(self, output_dir: str) -> None: Uses Docker containers to install language-specific dependencies (pip packages for Python, npm packages for Node.js) in an environment - matching the target deployment platform. Handles both volume mounting - and file copying approaches for Docker compatibility. + matching the target deployment platform. + Pulls a pre-built Docker image specific to the deployment, language, and + runtime version. Mounts the output directory into the container and runs + an installer script (`/sebs/installer.sh`) within the container. + Handles fallbacks to unversioned Docker images if versioned ones are not found. + + Supports copying files to/from Docker for environments where volume mounting + is problematic (e.g., CircleCI). Args: output_dir: Directory containing the code package to build @@ -976,9 +987,11 @@ def build( """Build the complete benchmark deployment package. Orchestrates the entire build process for a benchmark, including: - - Cache validation and reuse if possible - Code copying and dependency installation - - Platform-specific build steps + - Adding benchmark data and deployment-specific files + - Running platform-specific build and packaging steps + (e.g., zipping, creating container image). + - Cache validation and reuse if possible - Cache updates after successful build Args: @@ -1069,9 +1082,12 @@ def prepare_input( ) -> Dict[str, str]: """Prepare benchmark input data and allocate cloud resources. - Handles the setup of cloud storage buckets and NoSQL databases - required by the benchmark. Generates benchmark-specific input data - and uploads it to the appropriate cloud storage systems. + Locates the benchmark's input generator module (`input.py`), determines + storage requirements (object storage buckets, NoSQL tables), and invokes + the `generate_input` function from the module to create and upload + input data. Handles the setup of cloud storage buckets and NoSQL databases + required by the benchmark. + Updates the cache with storage details after successful preparation. Args: system_resources: Cloud system resources manager @@ -1150,16 +1166,12 @@ def prepare_input( return input_config - """ - This is used in experiments that modify the size of input package. - This step allows to modify code package without going through the entire pipeline. - """ - def code_package_modify(self, filename: str, data: bytes) -> None: """Modify a file within the deployed code package. Updates a specific file within the code package without rebuilding the entire package. Currently only supports ZIP archive packages. + This is used in experiments that modify the size of input package. Args: filename: Name of the file to modify within the package @@ -1208,7 +1220,6 @@ def code_package_recompute_size(self) -> float: self._code_size = bytes_size return bytes_size - # https://stackoverflow.com/questions/25738523/how-to-update-one-file-inside-zip-file-using-python @staticmethod def _update_zip(zipname: str, filename: str, data: bytes) -> None: """Update a file within a ZIP archive. @@ -1216,6 +1227,12 @@ def _update_zip(zipname: str, filename: str, data: bytes) -> None: Replaces the content of a specific file within a ZIP archive while preserving all other files and archive metadata. + Creates a temporary zip file, copies all items from the original except + the target file (if it exists), and adds/replaces the target file with + new data. Finally, replaces the original zip with the temporary one. + Based on method from: + https://stackoverflow.com/questions/25738523/how-to-update-one-file-inside-zip-file-using-python + Args: zipname: Path to the ZIP archive to modify filename: Name of the file to update within the archive @@ -1245,16 +1262,11 @@ def _update_zip(zipname: str, filename: str, data: bytes) -> None: zf.writestr(filename, data) -""" - The interface of `input` module of each benchmark. - Useful for static type hinting with mypy. -""" - - class BenchmarkModuleInterface: """Interface definition for benchmark input modules. + Useful for static type hinting with mypy and documentation. - This abstract class defines the interface that benchmark input modules + This class defines the interface that benchmark input modules must implement to provide input data generation, storage allocation, and NoSQL database setup for benchmarks. diff --git a/sebs/cache.py b/sebs/cache.py index d58eac73..b907d440 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -8,6 +8,12 @@ code packages, Docker containers, deployed functions, and cloud resource configurations to optimize repeated benchmark executions and deployments. +This class is essential for efficient benchmarking - we avoid regenerating +cloud resources, and we do not have to keeep querying them every time +we start the benchmark. This is particularly important for cloud platforms +like Azure, where queries require CLI tool running in a container and can +take long time to resolve. + Example: Basic cache usage: cache = Cache("/path/to/cache", docker_client) @@ -15,7 +21,6 @@ cache.add_code_package("aws", benchmark_instance) """ -# https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth import collections.abc import docker import datetime @@ -35,9 +40,10 @@ def update(d: Dict[str, Any], u: Mapping[str, Any]) -> Dict[str, Any]: """Recursively update nested dictionary with another dictionary. - This function performs deep merge of two dictionaries, updating nested + This function performs deep merge of two dictionaries, merging nested dictionary values rather than replacing them entirely. + Args: d (Dict[str, Any]): The target dictionary to update. u (Mapping[str, Any]): The source dictionary with updates. @@ -45,6 +51,8 @@ def update(d: Dict[str, Any], u: Mapping[str, Any]) -> Dict[str, Any]: Returns: Dict[str, Any]: The updated dictionary. """ + + # https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth for k, v in u.items(): if isinstance(v, collections.abc.Mapping): d[k] = update(d.get(k, {}), v) @@ -87,7 +95,7 @@ class Cache(LoggingBase): config_updated (bool): Flag indicating if configuration needs to be saved. cache_dir (str): Absolute path to the cache directory. ignore_functions (bool): Flag to skip function caching operations. - ignore_storage (bool): Flag to skip storage resource caching. + ignore_storage (bool): Flag to skip storage resoyrce caching. docker_client (docker.DockerClient): Docker client for container operations. """ @@ -123,7 +131,7 @@ def typename() -> str: Returns: str: The cache type name. """ - return "Benchmark" + return "Cache" def load_config(self) -> None: """Load cached cloud configurations from disk. @@ -307,7 +315,8 @@ def get_nosql_config(self, deployment: str, benchmark: str) -> Optional[Dict[str def _get_resource_config( self, deployment: str, benchmark: str, resource: str ) -> Optional[Dict[str, Any]]: - """Get cached resource configuration for a benchmark. + """Helper to retrieve a specific type of resource + configuration from the benchmark's cache. Args: deployment (str): Deployment platform name. @@ -348,11 +357,12 @@ def update_nosql(self, deployment: str, benchmark: str, config: Dict[str, Any]) def _update_resources( self, deployment: str, benchmark: str, resource: str, config: Dict[str, Any] ) -> None: - """Update cached resource configuration for a benchmark. + """Internal helper to update a resource configuration (storage or NoSQL) in the cache. + - This method handles caching of resource configurations (storage, nosql) - for benchmarks. It creates the benchmark directory if it doesn't exist - and updates the configuration file. + Since the benchmark data is prepared before creating and caching a function, + it ensures the benchmark's cache directory exists and updates the `config.json` file + within it. Args: deployment (str): Deployment platform name. @@ -389,8 +399,10 @@ def add_code_package( ) -> None: """Add a new code package to the cache. - Caches a compiled benchmark code package (either directory or ZIP file) - along with its configuration. Handles both package and container deployments. + Copies the code package (directory or zip file) into the cache structure. + Records metadata (hash, size, location, timestamps, image details if container) + in the benchmark's `config.json` within the cache. + Handles both package and container deployments. Args: deployment_name (str): Name of the deployment platform. @@ -509,8 +521,9 @@ def update_code_package( ) -> None: """Update an existing code package in the cache. - Updates cached code package with new content and metadata. If the - cached package doesn't exist, adds it as a new package. + Copies the new code package version over the old one. Updates metadata + (hash, size, modification timestamp, image details if container) in the + benchmark's `config.json`. If the cached package doesn't exist, adds it as a new package. Args: deployment_name (str): Name of the deployment platform. diff --git a/sebs/config.py b/sebs/config.py index d3cd388b..5c91a221 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -69,6 +69,7 @@ def docker_repository(self) -> str: def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[str, str]: """Get deployment packages for a specific deployment and language. + These are packages added by SeBS to the benchmark's list of dependencies. Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). @@ -84,7 +85,8 @@ def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[ def deployment_module_packages( self, deployment_name: str, language_name: str ) -> Dict[str, str]: - """Get deployment module packages for a specific deployment and language. + """Get deployment module packages for a specific deployment and language, e.g., + packages specific to object or NoSQL storage. Args: deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). @@ -237,11 +239,14 @@ def benchmark_image_tag( language_version: str, architecture: str, ) -> str: - """Generate Docker image tag for a benchmark. + """Generate Docker image tag for a benchmark container. Creates a standardized tag format that includes system, benchmark, language, version, architecture, optional prefix, and SeBS version. + Format: function.{system}.{benchmark}.{language_name}-{language_version}- + {architecture}[-{image_prefix}]-{sebs_version} + Args: system (str): Deployment system name (e.g., 'aws', 'azure'). benchmark (str): Benchmark name (e.g., '110.dynamic-html'). diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index 2a747c69..6ba4ce4f 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -74,7 +74,10 @@ def update_storage(self) -> bool: return self._update_storage def check_flag(self, key: str) -> bool: - """Check if a flag is set. + """Check if a specific experiment flag is set. + + Currently it is only used to let benchmark know that Docker + volumes are disabled (e.g., in CircleCI environment). Args: key: Name of the flag to check diff --git a/sebs/experiments/environment.py b/sebs/experiments/environment.py index 8271acee..ca25cae3 100644 --- a/sebs/experiments/environment.py +++ b/sebs/experiments/environment.py @@ -1,7 +1,8 @@ """Environment management for experiment execution. This module provides the ExperimentEnvironment class for managing CPU settings -and system configuration during benchmark experiments. It handles: +and system configuration during benchmark experiments. This is useful for local, +Docker-based executions. It handles: - CPU frequency scaling and governor management - Hyperthreading control (enable/disable) diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 7ca90126..fd64efd0 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -8,6 +8,11 @@ The experiment involves invoking functions at increasing time intervals and observing when cold starts occur, thus inferring the platform's container caching and eviction policies. + +This implemnetation is slightly different than the original one, +which used the 010.sleep benchmark. Here, we use the 040.server-reply +to double check that all functions are "alive" at the same time. +However, the sleep logic is not currently implemented in 040.server-reply. """ import logging @@ -52,24 +57,24 @@ class EvictionModel(Experiment): # Time intervals (in seconds) between invocations # Uncomment additional intervals as needed for longer tests times = [ - 1, # 1 second - # 2, # 2 seconds - # 4, # 4 seconds - # 8, # 8 seconds - # 15, # 15 seconds - # 30, # 30 seconds - # 60, # 1 minute - # 120, # 2 minutes - # 180, # 3 minutes - # 240, # 4 minutes - # 300, # 5 minutes - # 360, # 6 minutes - # 480, # 8 minutes - # 600, # 10 minutes - # 720, # 12 minutes - # 900, # 15 minutes - # 1080, # 18 minutes - # 1200, # 20 minutes + 1, + # 2, + # 4, + # 8, + # 15, + # 30, + # 60, + # 120, + # 180, + # 240, + # 300, + # 360, + # 480, + # 600, + # 720, + # 900, + # 1080, + # 1200, ] # TODO: temporal fix # function_copies_per_time = 5 @@ -110,6 +115,8 @@ def accept_replies(port: int, invocations: int) -> None: acceptance to ensure functions receive a response. The method logs all activity to a file. + This is used by the '040.server-reply' benchmark to confirm function execution. + Args: port: TCP port to listen on invocations: Number of expected function invocations @@ -162,6 +169,9 @@ def execute_instance( between them. The first invocation should be a cold start, and the second will indicate whether the container was evicted during the sleep period. + This function is intended to be run in a separate thread; it performs two + synchronous HTTP invocations of the given function. + Args: sleep_time: Time to sleep between invocations (seconds) pid: Process ID for logging @@ -279,9 +289,10 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. This method sets up the benchmark, functions, and output directory for - the experiment. It creates a separate function for each time interval - and copy combination, allowing for parallel testing of different - eviction times. + the experiment. Retrieves the '040.server-reply' benchmark, sets up result storage, + and creates a separate function for each time interval and copy combination, + allowing for parallel testing of different eviction times. + Args: sebs_client: The SeBS client to use @@ -310,9 +321,6 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: self.functions = [] for fname in self.functions_names: - # if self._benchmark.functions and fname in self._benchmark.functions: - # self.logging.info(f"Skip {fname}, exists already.") - # continue self.functions.append(deployment_client.get_function(self._benchmark, func_name=fname)) def run(self) -> None: @@ -420,15 +428,5 @@ def run(self) -> None: # verify_results(results) with open(os.path.join(self._out_dir, fname), "w") as out_f: - # print(results) print(f"Write results to {os.path.join(self._out_dir, fname)}") out_f.write(serialize(results)) - # func = self._deployment_client.get_function( - # self._benchmark, self.functions_names[0] - # ) - # self._deployment_client.enforce_cold_start(func) - # ret = func.triggers[0].async_invoke(payload) - # result = ret.result() - # print(result.stats.cold_start) - # self._result.add_invocation(func, result) - # print(serialize(self._result)) diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index 9ded288f..efe502ad 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -11,6 +11,8 @@ The experiment is designed to help identify performance bottlenecks and optimize function deployment and invocation. +We deploy microbenchmark 030.clock-synchronization to exactly measure the +network latency between client and function. """ import csv @@ -79,6 +81,9 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings def before_sample(self, size: int, input_benchmark: dict) -> None: """Prepare the benchmark with a specific code package size. + Creates a file named 'randomdata.bin' with the specified size of random bytes + within the benchmark's code package. Then, updates the function on the deployment. + Args: size: Size of the code package to create input_benchmark: Benchmark input configuration (unused) @@ -118,6 +123,8 @@ def __init__(self, settings: dict) -> None: def before_sample(self, size: int, input_benchmark: dict) -> None: """Prepare the benchmark input with a specific payload size. + Generates different payload sizes by creating base64 encoded byte arrays. + Args: size: Size of the payload to create input_benchmark: Benchmark input configuration to modify @@ -211,7 +218,10 @@ def run(self) -> None: 1. Setting up either code package size or payload size experiments 2. Running warm-up and cold start invocations 3. Measuring invocation overhead for different sizes - 4. Collecting and storing results in CSV format + (either code package or payload, based on settings) + 4. Collecting and storing results in CSV format, + including client-side and server-side timestamps + """ from requests import get @@ -290,8 +300,9 @@ def process( """Process experiment results and generate summary statistics. This method processes the raw experiment results by: - 1. Loading timing data from CSV files - 2. Computing clock drift and round-trip time + 1. Loading client-side timing data from CSV files + and server-side UDP datagram timestamps + 2. Computing clock drift and Round-Trip Time (RTT) 3. Creating a processed results file with invocation times Args: @@ -367,9 +378,12 @@ def receive_datagrams( """Receive UDP datagrams from the function for clock synchronization. This method implements a UDP server that communicates with the function - to measure clock synchronization and network timing. It receives - datagrams from the function and responds to them, measuring timing - information. + to measure clock synchronization and network timing. + It opens a UDP socket, triggers an asynchronous function invocation, and then + listens for a specified number of datagrams, recording timestamps for + received and sent datagrams. + + Saves server-side timestamps to a CSV file named `server-{request_id}.csv`. Args: input_benchmark: Benchmark input configuration diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index 2f3f5672..640ab7bd 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -2,13 +2,8 @@ This module provides the NetworkPingPong experiment implementation, which measures network latency and throughput characteristics between client and -serverless functions, as well as between serverless functions and storage -services. It can determine: - -- Client-to-function latency -- Function-to-storage latency -- Network throughput for different payload sizes -- Variation in network performance over time +serverless functions. It determines various latency characteristics of the network +connection in the cloud. """ import csv @@ -35,10 +30,12 @@ class NetworkPingPong(Experiment): """Network latency and throughput measurement experiment. - This experiment measures the network performance characteristics - between the client, serverless functions, and storage services. - It can measure ping-pong latency and throughput with different - payload sizes and concurrency levels. + This experiment measures the network RTT (Round-Trip Time) using a ping-pong mechanism. + Deploys the '020.network-benchmark' which echoes back UDP datagrams. + The experiment sends a series of datagrams and measures the time taken + for each to return. This experiment measures the network performance characteristics + between the client and serverless functions. + Attributes: benchmark_input: Input configuration for the benchmark @@ -61,8 +58,8 @@ def __init__(self, config: ExperimentConfig): def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: """Prepare the experiment for execution. - This method sets up the benchmark, function, triggers, storage, and output - directory for the experiment. It creates or gets the function and + This method sets up the '020.network-benchmark' benchmark, triggers, storage, + and output directory for the experiment. It creates or gets the function and its HTTP trigger, and prepares the input data for the benchmark. Args: diff --git a/sebs/experiments/startup_time.py b/sebs/experiments/startup_time.py deleted file mode 100644 index dd7ccef5..00000000 --- a/sebs/experiments/startup_time.py +++ /dev/null @@ -1,95 +0,0 @@ -"""Startup time measurement experiment implementation. - -This module provides the StartupTime experiment implementation, which measures -the startup and initialization time of serverless functions. This experiment -focuses on measuring: - -- Cold start initialization time -- Container startup overhead -- Runtime initialization time -- Language-specific startup costs - -The experiment is designed to isolate and measure the time it takes for -a serverless platform to initialize a new container and runtime environment. -""" - -from typing import TYPE_CHECKING - -from sebs.experiments.experiment import Experiment -from sebs.experiments.config import Config as ExperimentConfig - -if TYPE_CHECKING: - from sebs import SeBS - from sebs.faas.system import System as FaaSSystem - - -class StartupTime(Experiment): - """Startup time measurement experiment. - - This experiment measures the startup and initialization time of serverless - functions, focusing on cold start performance. It isolates the time spent - in container initialization, runtime startup, and function loading. - - The experiment can be used to compare startup times across different: - - Programming languages and runtimes - - Memory configurations - - Code package sizes - - Platform configurations - - Attributes: - config: Experiment configuration settings - """ - - def __init__(self, config: ExperimentConfig) -> None: - """Initialize a new StartupTime experiment. - - Args: - config: Experiment configuration - """ - super().__init__(config) - - @staticmethod - def name() -> str: - """Get the name of the experiment. - - Returns: - The name "startup-time" - """ - return "startup-time" - - @staticmethod - def typename() -> str: - """Get the type name of the experiment. - - Returns: - The type name "Experiment.StartupTime" - """ - return "Experiment.StartupTime" - - def prepare(self, sebs_client: "SeBS", deployment_client: "FaaSSystem") -> None: - """Prepare the experiment for execution. - - This method sets up the experiment by preparing the benchmark function - and configuring the necessary resources for measuring startup time. - - Args: - sebs_client: The SeBS client to use - deployment_client: The deployment client to use - - Note: - This experiment is currently a placeholder and needs implementation. - """ - # TODO: Implement startup time experiment preparation - pass - - def run(self) -> None: - """Execute the startup time experiment. - - This method runs the experiment to measure function startup times, - enforcing cold starts and measuring initialization overhead. - - Note: - This experiment is currently a placeholder and needs implementation. - """ - # TODO: Implement startup time experiment execution - pass diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 9da60d86..1187137f 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -20,10 +20,9 @@ Config: Abstract base for complete platform configuration The credentials initialization follows this precedence order: -1. Load credentials from cache -2. Override with any new values provided in config -3. Fall back to environment variables -4. Report failure if no credentials are available +1. Load credentials with values provided in config +2. Fall back to environment variables +3. Report failure if no credentials are available """ from __future__ import annotations @@ -45,12 +44,6 @@ class Credentials(ABC, LoggingBase): specific credential types (API keys, service account files, connection strings, etc.) while following the common serialization and caching patterns defined here. - - Platform implementations must handle: - - Loading credentials from user configuration - - Fallback to environment variables - - Secure storage in cache - - Credential validation and refresh """ def __init__(self): @@ -64,9 +57,10 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Crede This method implements the credential loading hierarchy: 1. Use new config values if provided - 2. Fall back to cached credentials - 3. Load from environment variables - 4. Fail if no credentials available + 2. Load from environment variables + 3. Fail if no credentials available + + Credentials are NOT cached. Args: config: User-provided configuration dictionary @@ -278,6 +272,9 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resou def serialize(self) -> dict: """Serialize resources to dictionary for cache storage. + Subclasses should call `super().serialize()` and extend the dictionary. + This base implementation serializes `resources_id` and `storage_buckets`. + Returns: dict: Serialized resource data including resource ID and bucket mappings """ @@ -382,7 +379,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi This method serves as a factory for platform-specific configurations, dynamically loading the appropriate implementation based on the platform - name specified in the configuration. + name specified in the configuration. To do that, it calls + the appropriate subclass's deserialize method. Args: config: User-provided configuration dictionary @@ -423,6 +421,9 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi def serialize(self) -> dict: """Serialize configuration to dictionary for cache storage. + Subclasses should call `super().serialize()` and extend the dictionary. + This base implementation serializes `name` and `region`. + Returns: dict: Serialized configuration including platform name and region """ diff --git a/sebs/faas/container.py b/sebs/faas/container.py index 3894f2bc..ee881a52 100644 --- a/sebs/faas/container.py +++ b/sebs/faas/container.py @@ -81,7 +81,7 @@ def disable_rich_output(self, val: bool): def __init__( self, system_config: SeBSConfig, - docker_client, + docker_client: docker.client, experimental_manifest: bool = False, ): """Initialize the Docker container manager. @@ -246,15 +246,28 @@ def build_base_image( benchmark: str, is_cached: bool, ) -> Tuple[bool, str]: + """ - When building function for the first time (according to SeBS cache), - check if Docker image is available in the registry. - If yes, then skip building. - If no, then continue building. - - For every subsequent build, we rebuild image and push it to the - registry. These are triggered by users modifying code and enforcing - a build. + Build benchmark Docker image. + When building function for the first time (according to SeBS cache), + check if Docker image is available in the registry. + If yes, then skip building. + If no, then continue building. + + For every subsequent build, we rebuild image and push it to the + registry. These are triggered by users modifying code and enforcing + a build. + + Args: + directory: build directory + language_name: benchmark language + language_version: benchmark language version + architecture: CPU architecture + benchmark: benchmark name + is_cached: true if it the image is currently cached + + Returns: + Tuple[bool, str]: True if image was rebuilt, and image URI """ registry_name, repository_name, image_tag, image_uri = self.registry_name( From 963f5779d37b299cc42435895b91633955face0a Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Sat, 28 Jun 2025 16:38:25 +0200 Subject: [PATCH 21/21] [docs] Finishing reviewing Claude-generated docstrings --- sebs/aws/s3.py | 4 +- sebs/azure/config.py | 22 +-- sebs/azure/cosmosdb.py | 10 +- sebs/azure/function.py | 32 +---- sebs/azure/system_resources.py | 7 +- sebs/azure/triggers.py | 8 +- sebs/faas/function.py | 14 +- sebs/faas/nosql.py | 78 ++++++----- sebs/faas/resources.py | 11 +- sebs/faas/storage.py | 239 +++++++++++++++++++++++---------- sebs/faas/system.py | 45 ++++--- sebs/gcp/__init__.py | 6 +- sebs/gcp/cli.py | 29 ++-- sebs/gcp/config.py | 39 +++--- sebs/gcp/datastore.py | 12 +- sebs/gcp/function.py | 1 + sebs/gcp/gcp.py | 22 +-- sebs/gcp/resources.py | 5 +- sebs/gcp/storage.py | 16 ++- sebs/gcp/triggers.py | 4 + sebs/local/__init__.py | 9 +- sebs/local/config.py | 12 +- sebs/local/deployment.py | 15 +-- sebs/local/function.py | 9 +- sebs/local/local.py | 15 +-- sebs/local/measureMem.py | 6 +- sebs/openwhisk/__init__.py | 8 +- sebs/openwhisk/config.py | 16 +-- sebs/openwhisk/container.py | 18 +-- sebs/openwhisk/function.py | 24 ++-- sebs/openwhisk/openwhisk.py | 15 ++- sebs/openwhisk/triggers.py | 3 +- sebs/regression.py | 48 ++----- sebs/sebs.py | 12 +- sebs/statistics.py | 15 +-- sebs/storage/__init__.py | 9 +- sebs/storage/config.py | 18 +-- sebs/storage/minio.py | 34 ++--- sebs/storage/resources.py | 32 +---- sebs/storage/scylladb.py | 24 ++-- sebs/types.py | 4 +- sebs/utils.py | 1 + 42 files changed, 462 insertions(+), 489 deletions(-) diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index 3055aacc..0ba90385 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -149,9 +149,11 @@ def _create_bucket( # this is incredible # https://github.com/boto/boto3/issues/125 if self.region != "us-east-1": + from typing import cast, Any + self.client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={"LocationConstraint": self.region}, + CreateBucketConfiguration={"LocationConstraint": cast(Any, self.region)}, ) else: # This is incredible x2 - boto3 will not throw exception if you recreate diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 08dcb0aa..5e20ea75 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -3,25 +3,12 @@ This module provides configuration classes for Azure resources, credentials, and deployment settings. It handles Azure-specific configuration including service principal authentication, resource group management, storage accounts, -and CosmosDB setup for the SeBS benchmarking suite. +and CosmosDB setup. Key classes: AzureCredentials: Manages Azure service principal authentication AzureResources: Manages Azure resource allocation and lifecycle AzureConfig: Combines credentials and resources for Azure deployment - -Example: - Basic usage for setting up Azure configuration: - - :: - - from sebs.azure.config import AzureConfig, AzureCredentials, AzureResources - from sebs.cache import Cache - - # Load configuration from config dict and cache - config = AzureConfig.deserialize(config_dict, cache, handlers) - credentials = config.credentials - resources = config.resources """ import json @@ -208,6 +195,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden def serialize(self) -> dict: """Serialize credentials to dictionary. + We store only subscription ID to avoid unsecure storage of sensitive data. + Returns: Dictionary containing serialized credential data. """ @@ -227,8 +216,7 @@ class AzureResources(Resources): """Azure resource management for SeBS benchmarking. This class manages Azure cloud resources including storage accounts, - resource groups, and CosmosDB accounts required for serverless function - benchmarking. It handles resource allocation, caching, and lifecycle management. + resource groups, and CosmosDB accounts. Attributes: _resource_group: Name of the Azure resource group @@ -607,6 +595,7 @@ def _create_storage_account( """Internal method to create storage account. Creates a new Azure storage account with the specified name. + This one can be usedboth for data storage and function storage. This method does NOT update cache or add to resource collections. Args: @@ -695,6 +684,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour """Deserialize resources from config and cache. Loads Azure resources from cache if available, otherwise from configuration. + If no data is present, then we initialize an empty resources object. Args: config: Configuration dictionary diff --git a/sebs/azure/cosmosdb.py b/sebs/azure/cosmosdb.py index 71835b05..088ea2c3 100644 --- a/sebs/azure/cosmosdb.py +++ b/sebs/azure/cosmosdb.py @@ -86,7 +86,7 @@ class CosmosDB(NoSQLStorage): resource lifecycle. It supports benchmark-specific database allocation and container creation with proper caching and error handling. - Azure CosmosDB uses a different model than traditional NoSQL databases: + Azure CosmosDB has the following model: - Each benchmark gets its own database - Container names match benchmark table names directly - No table mappings are required @@ -341,13 +341,11 @@ def create_table( if benchmark_resources is not None and name in benchmark_resources.containers: self.logging.info(f"Using cached CosmosDB container {name}") - # For some reason, creating the client is enough to verify existence of db/container. - # We need to force the client to make some actions; that's why we call read. - - # Each benchmark receives its own CosmosDB database if benchmark_resources is None: - # Get or allocate database + # For some reason, creating the client is not enough to verify existence + # of db/container. + # We need to force the client to make some actions; that's why we call read. try: db_client = self.cosmos_client().get_database_client(benchmark) db_client.read() diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 72006341..a95aff04 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,29 +1,7 @@ """Azure Function implementation for SeBS benchmarking. -This module provides the Azure-specific implementation of serverless functions -for the SeBS benchmarking suite. It handles Azure Function representation, -serialization, and deserialization with Azure-specific storage configuration. - -The AzureFunction class extends the base Function class to include Azure-specific -attributes like function storage accounts and Azure trigger configurations. - -Example: - Basic usage for creating an Azure Function: - - :: - - from sebs.azure.function import AzureFunction - from sebs.azure.config import AzureResources - from sebs.faas.function import FunctionConfig - - # Create function with Azure-specific storage - function = AzureFunction( - name="my-function", - benchmark="test-benchmark", - code_hash="abc123", - function_storage=storage_account, - cfg=function_config - ) +The AzureFunction class extends the base Function class and adds +one Azure-specific property: storage account associated with this function. """ from sebs.azure.config import AzureResources @@ -31,11 +9,7 @@ class AzureFunction(Function): - """Azure Function implementation for SeBS benchmarking. - - This class represents an Azure Function with Azure-specific attributes - and configuration. It includes storage account information and supports - Azure-specific triggers like HTTP triggers. + """Azure Function implementation. Attributes: function_storage: Azure Storage account used for function code storage diff --git a/sebs/azure/system_resources.py b/sebs/azure/system_resources.py index 960d5185..a009074c 100644 --- a/sebs/azure/system_resources.py +++ b/sebs/azure/system_resources.py @@ -83,10 +83,6 @@ def __init__( def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: """Get or create Azure Blob Storage instance. - Creates wrapper object for Azure blob storage with proper authentication. - First ensures that storage account is created and connection string - is known, then creates wrapper and creates requested number of buckets. - Requires Azure CLI instance in Docker to obtain storage account details. Args: @@ -115,6 +111,8 @@ def get_nosql_storage(self) -> CosmosDB: Creates and configures CosmosDB instance for NoSQL benchmark operations. Handles authentication and database/container creation as needed. + Requires Azure CLI instance in Docker. + Returns: CosmosDB: Azure CosmosDB instance for NoSQL operations. """ @@ -190,6 +188,7 @@ def shutdown(self) -> None: Cleans up Azure CLI Docker container and other resources. Only shuts down CLI if it was created by this instance. + Does not terminate CLI instance attached to the class. """ if self._cli_instance and self._cli_instance_stop: self._cli_instance.shutdown() diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index d903fa09..3e3aa586 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,11 +1,7 @@ """Azure Function triggers for SeBS benchmarking. This module provides Azure-specific trigger implementations for invoking -serverless functions during benchmarking. It supports HTTP triggers and -integrates with Azure storage for data handling. - -The trigger classes handle function invocation, result processing, and -integration with Azure-specific services like Blob Storage. +serverless functions. Example: Basic usage for HTTP trigger: @@ -39,6 +35,8 @@ class AzureTrigger(Trigger): triggers, including data storage account management for benchmark data handling. + FIXME: do we still need to know the data storage account? + Attributes: _data_storage_account: Azure storage account for benchmark data """ diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 5cb53330..7fd4e314 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -33,7 +33,7 @@ class ExecutionTimes: Stores various timing measurements from the client's perspective, including total execution time, HTTP connection times, and benchmark - runtime. + runtime. All times are reported in microseconds unless otherwise specified. Attributes: client: Total client-side execution time in microseconds @@ -353,6 +353,8 @@ class Trigger(ABC, LoggingBase): A trigger represents a mechanism for invoking a serverless function, such as HTTP requests, direct SDK invocations, or event-based triggers. Each trigger type implements synchronous and asynchronous invocation methods. + + Includes a helper method for HTTP invocations using pycurl. """ class TriggerType(Enum): @@ -392,7 +394,7 @@ def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> Exec """ Invoke a function via HTTP request. - Makes a HTTP POST request to the given URL with the provided payload + Makes a HTTP POST request using pycurl to the given URL, with the provided payload, and processes the response into an ExecutionResult. Args: @@ -712,12 +714,14 @@ class Function(LoggingBase): Abstract base class for serverless functions. This class represents a deployed serverless function with its configuration - and associated triggers. It provides a unified interface for managing function - deployments across different cloud providers. - + and contains a list of associated triggers. Each cloud provider (AWS, Azure, GCP, etc.) implements a subclass with platform-specific functionality. + Represents a deployable unit of code on a FaaS platform. Contains details + about the benchmark it belongs to, its name, code hash, configuration, + and associated triggers. Subclasses implement provider-specific details. + Attributes: config: Function configuration name: Name of the deployed function diff --git a/sebs/faas/nosql.py b/sebs/faas/nosql.py index 835d7063..43945f54 100644 --- a/sebs/faas/nosql.py +++ b/sebs/faas/nosql.py @@ -79,7 +79,7 @@ def __init__(self, region: str, cache_client: Cache, resources: Resources): @abstractmethod def get_tables(self, benchmark: str) -> Dict[str, str]: """ - Get all tables associated with a benchmark. + Get a mapping of benchmark-defined table names to actual cloud provider table names. Args: benchmark: Name of the benchmark @@ -107,6 +107,7 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: def retrieve_cache(self, benchmark: str) -> bool: """ Retrieve cached table information for a benchmark. + Implementations should populate internal structures with cached table names/details. Args: benchmark: Name of the benchmark @@ -128,35 +129,36 @@ def update_cache(self, benchmark: str): def envs(self) -> dict: """ - Get environment variables required for connecting to the NoSQL storage. + Return a dictionary of environment variables that are required by functions + to access this NoSQL storage (e.g., connection strings, table names). + Default implementation returns an empty dictionary. Subclasses should override + if they need to expose environment variables. Returns: dict: Dictionary of environment variables """ return {} - """ - Table naming convention and implementation requirements. - - Each table name follows this pattern: - sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name} - - Each implementation should do the following: - 1. Retrieve cached data - 2. Create missing tables that do not exist - 3. Update cached data if anything new was created (done separately - in benchmark.py once the data is uploaded by the benchmark) - """ - def create_benchmark_tables( - self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None + self, + benchmark: str, + name: str, + primary_key: str, + secondary_key: Optional[str] = None, ): """ - Create a table for a benchmark if it doesn't exist in the cache. - Checks if the table already exists in the cache. If not, creates a new table with the specified keys. + Each table name follows this pattern: + sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name} + + Each implementation should do the following: + 1. Retrieve cached data + 2. Create missing tables that do not exist + 3. Update cached data if anything new was created (done separately + in benchmark.py once the data is uploaded by the benchmark) + Args: benchmark: Name of the benchmark name: Logical name of the table @@ -174,21 +176,22 @@ def create_benchmark_tables( self.logging.info(f"Preparing to create a NoSQL table {name} for benchmark {benchmark}") self.create_table(benchmark, name, primary_key, secondary_key) - """ - Platform-specific table implementations: - - - AWS: DynamoDB Table - - Azure: CosmosDB Container - - Google Cloud: Firestore in Datastore Mode, Database - """ - @abstractmethod def create_table( - self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None + self, + benchmark: str, + name: str, + primary_key: str, + secondary_key: Optional[str] = None, ) -> str: """ Create a new table for a benchmark. + Provider-specific implementation details: + - AWS: DynamoDB Table + - Azure: CosmosDB Container + - Google Cloud: Firestore in Datastore Mode, Database/Collection + Args: benchmark: Name of the benchmark name: Logical name of the table @@ -210,9 +213,12 @@ def write_to_table( secondary_key: Optional[Tuple[str, str]] = None, ): """ - Write data to a table. + Write an item/document to the specified table/container. + This is used by benchmarks to populate tables with test data. Args: + Write data to a table. + benchmark: Name of the benchmark table: Logical name of the table data: Dictionary of data to write @@ -221,18 +227,16 @@ def write_to_table( """ pass - """ - Table management operations: - - - AWS DynamoDB: Removing & recreating table is the cheapest & fastest option - - Azure CosmosDB: Recreate container - - Google Cloud: Also likely recreate - """ - @abstractmethod def clear_table(self, name: str) -> str: """ - Clear all data from a table. + Clear all items from a table/container. + Currently not implemented for any of hte proivders. + + Provider-specific implementation details: + - AWS DynamoDB: Removing & recreating table looks like the cheapest & fastest option. + - Azure CosmosDB: Recreate container or use specific API to delete items. + - Google Cloud: Likely recreate collection or use specific API. Args: name: Name of the table to clear diff --git a/sebs/faas/resources.py b/sebs/faas/resources.py index ee590881..21f5c877 100644 --- a/sebs/faas/resources.py +++ b/sebs/faas/resources.py @@ -4,12 +4,6 @@ across different serverless platforms. It coordinates access to storage services, NoSQL databases, and other cloud resources needed for benchmark execution. -The SystemResources class serves as the resource factory and manager, handling: -- Storage service provisioning and access -- NoSQL database provisioning and access -- Resource lifecycle management -- Platform-specific resource configuration - Each platform implementation (AWS, Azure, GCP, Local, etc.) provides concrete implementations that handle platform-specific resource management while following the common interface defined here. @@ -35,10 +29,6 @@ class SystemResources(ABC, LoggingBase): provisioning and access to storage services, NoSQL databases, and other platform-specific resources. - The class serves as a factory and coordinator for different types of storage - and database services, ensuring they are properly configured and accessible - to benchmark functions during execution. - Attributes: _config: Platform configuration containing credentials and settings _cache_client: Cache client for storing resource configurations @@ -86,6 +76,7 @@ def get_nosql_storage(self) -> NoSQLStorage: Provides access to NoSQL database services (DynamoDB, CosmosDB, Datastore, ScyllaDB) for benchmarks that require structured data storage with key-value or document-based operations. + The storage instance may be a cloud service or a locally deployed container. Returns: NoSQLStorage: Configured NoSQL storage instance ready for use diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 01c0a978..b698f585 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -8,7 +8,7 @@ - Cross-platform object storage (S3, Azure Blob, GCS, MinIO) - Benchmark data organization with input/output separation - Bucket lifecycle management and naming conventions -- File upload/download operations with caching +- Benchmark files upload/download operations with caching - Deployment discovery and resource management Each platform provides concrete implementations that handle platform-specific @@ -34,13 +34,6 @@ class PersistentStorage(ABC, LoggingBase): cloud platforms. It manages buckets, files, and benchmark data organization while providing a consistent API regardless of the underlying storage service. - Key responsibilities: - - Bucket lifecycle management (create, list, delete) - - File operations (upload, download, list) - - Benchmark data organization with input/output separation - - Storage configuration caching and management - - Cross-platform deployment discovery - Attributes: cached: Whether bucket configuration is cached _cache_client: Cache client for storing configuration @@ -55,7 +48,7 @@ class PersistentStorage(ABC, LoggingBase): @staticmethod @abstractmethod def deployment_name() -> str: - """Get the deployment platform name. + """Return the name of the FaaS deployment this storage belongs to (e.g., "aws", "azure"). Returns: str: Platform name (e.g., 'aws', 'azure', 'gcp', 'minio') @@ -72,8 +65,8 @@ def cache_client(self) -> Cache: return self._cache_client @property - def replace_existing(self): - """Get whether to replace existing files during operations. + def replace_existing(self) -> bool: + """Flag indicating whether to replace existing files during operations. Returns: bool: True if existing files should be replaced, False otherwise @@ -82,7 +75,7 @@ def replace_existing(self): @replace_existing.setter def replace_existing(self, val: bool): - """Set whether to replace existing files during operations. + """Set flag indicating whether to replace existing files during operations. Args: val: True to replace existing files, False to skip @@ -90,7 +83,7 @@ def replace_existing(self, val: bool): self._replace_existing = val @property - def region(self): + def region(self) -> str: """Get the cloud region for storage operations. Returns: @@ -122,6 +115,7 @@ def __init__( @property def input_prefixes(self) -> List[str]: """Get the list of input data prefixes for benchmarks. + These are paths within the benchmark data bucket. Returns: List[str]: List of input prefix names @@ -131,6 +125,7 @@ def input_prefixes(self) -> List[str]: @property def output_prefixes(self) -> List[str]: """Get the list of output data prefixes for benchmarks. + These are paths within the benchmark data bucket. Returns: List[str]: List of output prefix names @@ -159,6 +154,8 @@ def find_deployments(self) -> List[str]: from bucket names that follow the SeBS naming convention. This helps identify existing deployments that can be reused. + Looks for buckets named "sebs-benchmarks-*". + Returns: List[str]: List of deployment resource IDs found in bucket names """ @@ -176,84 +173,153 @@ def find_deployments(self) -> List[str]: def _create_bucket( self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False ) -> str: - pass + """Create a new storage bucket with platform-specific implementation. - """ - Download a file from a bucket. + Args: + name: Desired bucket name + buckets: Optional list of existing buckets to check against + randomize_name: Whether to add random suffix for uniqueness - :param bucket_name: - :param key: storage source filepath - :param filepath: local destination filepath - """ + Returns: + str: Name of the created bucket + + Raises: + Platform-specific exceptions for bucket creation failures + """ + pass @abstractmethod def download(self, bucket_name: str, key: str, filepath: str) -> None: - pass + """Download a file from a storage bucket. - """ - Upload a file to a bucket with by passing caching. - Useful for uploading code package to storage (when required). + Args: + bucket_name: Name of the source bucket + key: Storage source filepath (object key) + filepath: Local destination filepath - :param bucket_name: - :param filepath: local source filepath - :param key: storage destination filepath - """ + Raises: + Platform-specific exceptions for download failures + """ + pass @abstractmethod - def upload(self, bucket_name: str, filepath: str, key: str): - pass + def upload(self, bucket_name: str, filepath: str, key: str) -> None: + """Upload a file to a storage bucket. - """ - Retrieves list of files in a bucket. + Bypasses caching and directly uploads the file. Useful for uploading + code packages to storage when required by the deployment platform. - :param bucket_name: - :return: list of files in a given bucket - """ + Args: + bucket_name: Name of the destination bucket + filepath: Local source filepath + key: Storage destination filepath (object key) + + Raises: + Platform-specific exceptions for upload failures + """ + pass @abstractmethod def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """Retrieve list of files in a storage bucket. + + Args: + bucket_name: Name of the bucket to list + prefix: Optional prefix to filter objects + + Returns: + List[str]: List of file keys in the bucket matching the prefix + + Raises: + Platform-specific exceptions for listing failures + """ pass @abstractmethod def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """List all storage buckets/containers, optionally filtering + them with a prefix. + + Args: + bucket_name: Optional specific bucket prefix name to check for + + Returns: + List[str]: List of bucket names. If bucket_name is provided, + returns [bucket_name] if it exists, empty list otherwise. + + Raises: + Platform-specific exceptions for listing failures + """ pass @abstractmethod def exists_bucket(self, bucket_name: str) -> bool: + """Check if a storage bucket/container exists. + + Args: + bucket_name: Name of the bucket to check + + Returns: + bool: True if bucket exists, False otherwise + + Raises: + Platform-specific exceptions for access failures + """ pass @abstractmethod def clean_bucket(self, bucket_name: str) -> None: + """Remove all objects from a storage bucket. + + Args: + bucket_name: Name of the bucket to clean + + Raises: + Platform-specific exceptions for deletion failures + """ pass @abstractmethod - def remove_bucket(self, bucket: str): - pass + def remove_bucket(self, bucket: str) -> None: + """Delete a storage bucket completely. + The bucket must often be emptied afterwards. - """ - Allocate a set of input/output buckets for the benchmark. - The routine checks the cache first to verify that buckets have not - been allocated first. + Args: + bucket: Name of the bucket to remove - :param benchmark: benchmark name - :param buckets: number of input and number of output buckets - """ + Raises: + Platform-specific exceptions for deletion failures + """ + pass def benchmark_data( self, benchmark: str, requested_buckets: Tuple[int, int] ) -> Tuple[List[str], List[str]]: + """Allocate storage prefixes for benchmark input and output data. + Creates logical prefixes within the benchmarks bucket for organizing + benchmark input and output data. Checks cache first to avoid redundant + allocation and validates existing prefix configuration. + + Prefix naming format: + - Input: "benchmark-{idx}-input" + - Output: "benchmark-{idx}-output" + + Args: + benchmark: Name of the benchmark + requested_buckets: Tuple of (input_prefix_count, output_prefix_count) + + Returns: + Tuple[List[str], List[str]]: Lists of (input_prefixes, output_prefixes) """ - Add an input path inside benchmarks bucket. - Bucket name format: name-idx-input - """ + + # Add input prefixes inside benchmarks bucket + # Prefix format: name-idx-input for i in range(0, requested_buckets[0]): self.input_prefixes.append("{}-{}-input".format(benchmark, i)) - """ - Add an input path inside benchmarks bucket. - Bucket name format: name-idx-output - """ + # Add output prefixes inside benchmarks bucket + # Prefix format: name-idx-output for i in range(0, requested_buckets[1]): self.output_prefixes.append("{}-{}-output".format(benchmark, i)) @@ -304,6 +370,23 @@ def benchmark_data( return self.input_prefixes, self.output_prefixes def get_bucket(self, bucket_type: Resources.StorageBucketType) -> str: + """Get or create a storage bucket for the specified type. + + + Checks if the bucket is already known in `_cloud_resources`. If not, + generates a bucket name following the standard naming convention, + checks if it exists in the cloud, creates it + if necessary, and then stores it in `_cloud_resources`. + + Args: + bucket_type: Type of bucket to retrieve (BENCHMARKS, EXPERIMENTS, DEPLOYMENT) + + Returns: + str: Name of the bucket for the specified type + + Raises: + Platform-specific exceptions for bucket operations + """ bucket = self._cloud_resources.get_storage_bucket(bucket_type) if bucket is None: @@ -328,30 +411,46 @@ def get_bucket(self, bucket_type: Resources.StorageBucketType) -> str: return bucket - """ - Implements a handy routine for uploading input data by benchmarks. - It should skip uploading existing files unless storage client has been - initialized to override existing data. - - :param bucket_idx: index of input bucket - :param file: name of file to upload - :param filepath: filepath in the storage - """ - @abstractmethod def uploader_func(self, bucket_idx: int, file: str, filepath: str) -> None: + """Upload benchmark input data to storage with smart caching. + + Implements a utility function for uploading benchmark input data that + respects caching preferences. Skips uploading existing files unless + the storage client has been configured to override existing data. + + This is used by each benchmark to prepare input benchmark files. + + Args: + bucket_idx: Index of the input prefix/bucket + file: Name of the file to upload + filepath: Storage destination filepath (object key) + + Raises: + Platform-specific exceptions for upload failures + """ pass - """ - Download all files in a storage bucket. - Warning: assumes flat directory in a bucket! Does not handle bucket files - with directory marks in a name, e.g. 'dir1/dir2/file' - """ + def download_bucket(self, bucket_name: str, output_dir: str) -> None: + """Download all files from a storage bucket to a local directory. - def download_bucket(self, bucket_name: str, output_dir: str): + Downloads every file from the specified bucket to a local output directory. + Only downloads files that don't already exist locally. + + Warning: + Assumes flat directory structure in bucket. Does not handle object + keys with directory separators (e.g., 'dir1/dir2/file'). + + Args: + bucket_name: Name of the bucket to download from + output_dir: Local directory to download files to + + Raises: + Platform-specific exceptions for download failures + """ files = self.list_bucket(bucket_name) - for f in files: - output_file = os.path.join(output_dir, f) + for file_key in files: + output_file = os.path.join(output_dir, file_key) if not os.path.exists(output_file): - self.download(bucket_name, f, output_file) + self.download(bucket_name, file_key, output_file) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 8db54209..53dcbe25 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -111,8 +111,8 @@ def cold_start_counter(self) -> int: """ Get the cold start counter. - This counter is used in function name generation to help force cold starts - by creating new function instances with different names. + A counter used in attempts to enforce cold starts. + Its value might be incorporated into function environment variables. Returns: int: The current cold start counter value @@ -180,8 +180,9 @@ def initialize_resources(self, select_prefix: Optional[str]): This method either: 1. Uses an existing resource ID from configuration - 2. Finds and reuses an existing deployment matching the prefix - 3. Creates a new unique resource ID and initializes resources + 2. Finds existing deployment in the cloud and reuses it, matching the optional prefix + 3. If no suitable existing deployment is found or specified, + a new unique resource ID is generated. Args: select_prefix: Optional prefix to match when looking for existing deployments @@ -234,6 +235,7 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] After this call completes, the local or remote FaaS system should be ready to allocate functions, manage storage resources, and invoke functions. + Subclasses should override this to perform provider-specific initialization. Args: config: System-specific parameters @@ -257,9 +259,9 @@ def package_code( The benchmark creates a code directory with the following structure: - [benchmark sources] - - [benchmark resources] + - [benchmark resources], e.g., HTML template or ffmpeg binary - [dependence specification], e.g. requirements.txt or package.json - - [handlers implementation for the language and deployment] + - [language-speicifc wrappers implementation for the specific system] This step transforms that structure to fit platform-specific deployment requirements, such as creating a zip file for AWS or container image. @@ -316,7 +318,8 @@ def cached_function(self, function: Function): This method is called when a function is found in the cache. It may perform platform-specific operations such as checking if the function still exists - in the cloud, updating permissions, etc. + in the cloud, updating permissions, re-initializing transient client objects, + or ensuring associated resources (like triggers) are correctly configured. Args: function: The cached function instance @@ -332,7 +335,7 @@ def update_function( container_uri: str, ): """ - Update an existing function in the FaaS platform. + Update an existing function in the FaaS platform with new code and/or configuration. Args: function: Existing function instance to update @@ -352,11 +355,14 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) This method handles the complete function creation/update workflow: 1. If a cached function with the given name exists and code has not changed, - returns the existing function - 2. If a cached function exists but the code has changed, updates the - function with the new code + returns the cached function (after potential configuration checks/updates) + 2. If a cached function exists but the code hash differs or rebuild is foreced, + update the function code in the cloud. 3. If no cached function exists, creates a new function + Benchmark code is built (via `code_package.build`) before these steps. + The build might be skipped if source code hasn't changed and no update is forced. + Args: code_package: The benchmark containing the function code func_name: Optional name for the function (will be generated if not provided) @@ -471,10 +477,10 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) @abstractmethod def update_function_configuration(self, cached_function: Function, benchmark: Benchmark): """ - Update the configuration of an existing function. + Update the configuration of an existing function on the FaaS plaform. This method is called when a function's code is up-to-date but its - configuration (memory, timeout, etc.) needs to be updated. + configuration (memory, timeout, environment variable, etc.) needs to be updated. Args: cached_function: The function to update @@ -549,6 +555,7 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) This method implements platform-specific techniques to ensure that subsequent invocations of the functions will be cold starts. + In practice, this usually uses an update of environment variables with new values. Args: functions: List of functions to enforce cold starts for @@ -566,7 +573,11 @@ def download_metrics( metrics: dict, ): """ - Download function metrics from the cloud platform. + Download provider-specific performance metrics from the cloud platform. + + This typically involves querying a logging or monitoring service (e.g., CloudWatch, + Application Insights) for details like actual execution duration, memory usage, etc., + and populating the `requests` (ExecutionResult objects) and `metrics` dictionaries. Args: function_name: Name of the function to get metrics for @@ -593,7 +604,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) def disable_rich_output(self): """ - Disable rich output for platforms that support it. + Disable rich output for platforms that support it, e.g, progress of pushing Docker images. This is mostly used in testing environments or CI pipelines. """ @@ -604,7 +615,9 @@ def shutdown(self) -> None: """ Shutdown the FaaS system. - Closes connections, stops local instances, and updates the cache. + This should release any acquired resources, stop any running local services + (like Docker containers started by SeBS for CLI interactions), and update + the cache with the final system configuration. This should be called when the system is no longer needed. """ try: diff --git a/sebs/gcp/__init__.py b/sebs/gcp/__init__.py index 9e5b0f3d..ad3f3f69 100644 --- a/sebs/gcp/__init__.py +++ b/sebs/gcp/__init__.py @@ -1,7 +1,7 @@ """Google Cloud Platform (GCP) integration for SeBS. -This package provides comprehensive Google Cloud Platform support for the -Serverless Benchmarking Suite, including Cloud Functions deployment, Cloud Storage +This package provides comprehensive Google Cloud Platform support, +including Cloud Functions deployment, Cloud Storage for object storage, Firestore/Datastore for NoSQL operations, and Cloud Monitoring for performance metrics collection. @@ -11,7 +11,7 @@ - NoSQL database operations using Firestore in Datastore mode - Performance monitoring via Cloud Monitoring and Cloud Logging - Docker-based gcloud CLI integration for administrative operations -- Comprehensive credential and resource management +- Credential and resource management Modules: gcp: Main GCP system implementation diff --git a/sebs/gcp/cli.py b/sebs/gcp/cli.py index 96fa9c32..a38b43d0 100644 --- a/sebs/gcp/cli.py +++ b/sebs/gcp/cli.py @@ -1,8 +1,8 @@ """Google Cloud CLI integration for SeBS. -This module provides a Docker-based Google Cloud CLI interface for performing -administrative operations that require the gcloud command-line tool. It manages -a containerized gcloud environment with proper authentication and project setup. +This module provides a Docker-based Google Cloud CLI interface. +Currently, we use it mostly to allocate and manage Datastore accounts. +There's no API or Python library for that. Classes: GCloudCLI: Docker-based gcloud CLI interface for GCP operations @@ -54,6 +54,11 @@ def __init__( Sets up a Docker container with the gcloud CLI, pulling the image if needed and mounting the GCP credentials file for authentication. + Initialize GCloudCLI and start the Docker container. + Pulls the gcloud CLI Docker image if not found locally, then runs a + container in detached mode with credentials mounted. + + Args: credentials: GCP credentials with service account file path system_config: SeBS system configuration @@ -94,13 +99,6 @@ def __init__( tty=True, ) self.logging.info(f"Started gcloud CLI container: {self.docker_instance.id}.") - # while True: - # try: - # dkg = self.docker_instance.logs(stream=True, follow=True) - # next(dkg).decode("utf-8") - # break - # except StopIteration: - # pass def execute(self, cmd: str) -> bytes: """Execute a command in the gcloud CLI container. @@ -126,9 +124,14 @@ def execute(self, cmd: str) -> bytes: def login(self, project_name: str) -> None: """Authenticate gcloud CLI and set the active project. - Performs service account authentication using the mounted credentials file - and sets the specified project as the active project. Automatically confirms - any prompts that may appear during project setup. + Authenticates using the mounted credentials file (`/credentials.json` in + the container) and then sets the active Google Cloud project. + Automatically confirms any prompts that may appear during project setup. + Important: + - `gcloud init` is not used as it requires browser-based authentication. + Instead, we authenticate as a service account. + - Setting the project might show warnings about Cloud Resource Manager API + permissions, which are generally not needed for SeBS operations. Args: project_name: GCP project ID to set as active diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 873735eb..8035ef39 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -1,15 +1,10 @@ """Configuration classes for Google Cloud Platform (GCP) integration. -This module provides configuration classes for GCP serverless benchmarking, +This module provides configuration classes for GCP, including credentials management, resource allocation, and cloud region configuration. It handles authentication through service account JSON files and manages project-specific settings required for Cloud Functions deployment and execution. -The module supports multiple credential sources in priority order: -1. User-provided credentials in configuration -2. Cached credentials from previous sessions -3. Environment variables (GOOGLE_APPLICATION_CREDENTIALS, GCP_SECRET_APPLICATION_CREDENTIALS) - Classes: GCPCredentials: Handles authentication and project identification GCPResources: Manages allocated cloud resources @@ -31,9 +26,6 @@ from sebs.faas.config import Config, Credentials, Resources from sebs.utils import LoggingHandlers -# FIXME: Replace type hints for static generators after migration to 3.7 -# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel - class GCPCredentials(Credentials): """Credentials manager for Google Cloud Platform authentication. @@ -44,9 +36,8 @@ class GCPCredentials(Credentials): The class supports multiple credential sources in priority order: 1. User-provided credentials file path - 2. Cached credentials from previous sessions - 3. GOOGLE_APPLICATION_CREDENTIALS environment variable - 4. GCP_SECRET_APPLICATION_CREDENTIALS environment variable + 2. GOOGLE_APPLICATION_CREDENTIALS environment variable + 3. GCP_SECRET_APPLICATION_CREDENTIALS environment variable Attributes: _gcp_credentials: Path to the service account JSON file @@ -107,9 +98,11 @@ def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> Creden Loads credentials from multiple sources in priority order: 1. User-provided config with credentials-json path - 2. Cached credentials from previous sessions - 3. GOOGLE_APPLICATION_CREDENTIALS environment variable - 4. GCP_SECRET_APPLICATION_CREDENTIALS environment variable + 2. GOOGLE_APPLICATION_CREDENTIALS environment variable + 3. GCP_SECRET_APPLICATION_CREDENTIALS environment variable + + Sets the `GOOGLE_APPLICATION_CREDENTIALS` environment variable if credentials + are loaded from SeBS config or SeBS-specific environment variables. Args: config: Configuration dictionary potentially containing credentials @@ -166,6 +159,9 @@ def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> Creden def serialize(self) -> Dict: """Serialize credentials to dictionary for cache storage. + Only stores the project_id, as the path to credentials might change or be + environment-dependent. It also avoids any potential security issues. + Returns: Dictionary containing project_id for cache storage """ @@ -173,7 +169,7 @@ def serialize(self) -> Dict: return out def update_cache(self, cache: Cache) -> None: - """Update the cache with current credential information. + """Update the cache with current GCP project id. Args: cache: Cache instance to update with project ID @@ -182,15 +178,10 @@ def update_cache(self, cache: Cache) -> None: class GCPResources(Resources): - """Resource manager for Google Cloud Platform serverless resources. - - Manages cloud resources allocated for function execution and deployment, - such as IAM roles, API gateways for HTTP triggers, and other GCP-specific - infrastructure components. Storage resources are handled separately. + """Resource manager for serverless resources on Google Cloud Platform. - This class extends the base Resources class with GCP-specific resource - management capabilities and handles serialization/deserialization for - cache persistence. + Currently, this class primarily inherits functionality from the base `Resources` + class, as we do not need more GCP-specific resources beyond standard storage buckets. Attributes: Inherits all attributes from the base Resources class diff --git a/sebs/gcp/datastore.py b/sebs/gcp/datastore.py index 7c511527..f91a5998 100644 --- a/sebs/gcp/datastore.py +++ b/sebs/gcp/datastore.py @@ -3,6 +3,8 @@ This module provides NoSQL database functionality using Google Cloud Firestore in Datastore mode. It manages database allocation, table creation, and data operations for benchmarks requiring NoSQL storage capabilities. +To create databases, we use the gcloud CLI instance since there is no API +that we could access directly. Classes: BenchmarkResources: Resource configuration for benchmark databases @@ -132,6 +134,9 @@ def get_tables(self, benchmark: str) -> Dict[str, str]: def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: """Get the actual table name for a benchmark table. + In Datastore's case, the table alias is the kind name if it's registered + for the benchmark. + Args: benchmark: Name of the benchmark table: Logical table name @@ -239,8 +244,11 @@ def create_table( ) -> str: """Create a new entity kind (table) in Datastore. - Creates a new Firestore database in Datastore mode if needed, and adds - the specified entity kind to the benchmark's resource configuration. + Creates a new Firestore database in Datastore mode if needed using gloud CLI. + Datastore kinds are schemaless and created implicitly when an entity of that + kind is first written. This method primarily ensures the database exists and + registers the kind name for the benchmark. The `primary_key` is noted but + not directly used to create schema for the kind itself, as Datastore is schemaless. Args: benchmark: Name of the benchmark diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index ee56507d..d4e2eedd 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -62,6 +62,7 @@ def typename() -> str: def serialize(self) -> Dict: """Serialize function to dictionary for cache storage. + Adds code bucket in cloud storage. Returns: Dictionary containing function state including bucket information diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 64709ae3..9ed9b197 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -1,9 +1,8 @@ """Google Cloud Platform (GCP) serverless system implementation. -This module provides the main GCP implementation for the Serverless Benchmarking Suite, -including function deployment, management, monitoring, and resource allocation. -It integrates with Google Cloud Functions, Cloud Storage, Cloud Monitoring, and -Cloud Logging to provide comprehensive serverless benchmarking capabilities. +This module provides the main GCP implementation wiht function deployment, management, +monitoring, and resource allocation. It integrates with Google Cloud Functions, +Cloud Storage, Cloud Monitoring, and Cloud Logging. The module handles: - Function creation, updating, and lifecycle management @@ -138,6 +137,8 @@ def initialize( Sets up the Cloud Functions API client and initializes system resources including storage buckets and other required infrastructure. + After this call, the GCP system should be ready to allocate functions, + manage storage, and invoke functions. Args: config: Additional system-specific configuration parameters @@ -149,6 +150,8 @@ def initialize( def get_function_client(self): """Get the Google Cloud Functions API client. + The client is initialized during the `initialize` call. + Returns: Initialized Cloud Functions API client """ @@ -271,8 +274,8 @@ def package_code( Note that the function GCP.recursive_zip is slower than the use of e.g. `utils.execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True)` or `shutil.make_archive(benchmark_archive, direcory, directory)` - But both of the two alternatives need a chance of directory - (shutil.make_archive does the directorychange internaly) + But both of the two alternatives need a change of directory + (shutil.make_archive does the directory change internaly) which leads to a "race condition" when running several benchmarks in parallel, since a change of the current directory is NOT Thread specfic. """ @@ -300,7 +303,8 @@ def create_function( Deploys a benchmark as a Cloud Function, handling code upload to Cloud Storage, function creation with proper configuration, and IAM policy setup for - unauthenticated invocations. If the function already exists, updates it instead. + unauthenticated invocations (HTTP triggers). + If the function already exists, updates it instead. Args: code_package: Benchmark package with code and configuration @@ -440,6 +444,8 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) Creates HTTP triggers for Cloud Functions, waiting for function deployment to complete before extracting the trigger URL. + Only HTTP triggers are supported here; Library triggers are added by + default during function creation. Args: function: Function instance to create trigger for @@ -973,8 +979,8 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L return functions def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: - """Check if a function is deployed and optionally verify version. + """Check if a function is deployed and optionally verify its version. Args: func_name: Name of the function to check versionId: Optional specific version ID to verify (-1 to check any) diff --git a/sebs/gcp/resources.py b/sebs/gcp/resources.py index c4234f6b..a54b9e5a 100644 --- a/sebs/gcp/resources.py +++ b/sebs/gcp/resources.py @@ -2,8 +2,7 @@ This module provides the GCPSystemResources class that manages all GCP resources required for serverless benchmarking, including storage, NoSQL databases, and -CLI tools. It coordinates resource allocation and provides unified access to -GCP services. +CLI tools. Classes: GCPSystemResources: Main resource manager for GCP services @@ -33,8 +32,6 @@ class GCPSystemResources(SystemResources): """System resource manager for Google Cloud Platform services. - Manages and provides access to all GCP services required for serverless - benchmarking including Cloud Storage, Firestore/Datastore, and gcloud CLI. Handles resource initialization, configuration, and cleanup. Attributes: diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index 3d0a5e86..aed3120a 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -29,9 +29,8 @@ class GCPStorage(PersistentStorage): - """Google Cloud Storage implementation for SeBS persistent storage. + """Google Cloud Storage implementation providing persistent storage. - Provides object storage capabilities using Google Cloud Storage buckets. Handles bucket creation, file operations, and storage resource management for benchmarks, deployment artifacts, and experiment outputs. @@ -60,10 +59,12 @@ def deployment_name() -> str: @property def replace_existing(self) -> bool: + """Flag indicating whether to replace existing files in buckets.""" return self._replace_existing @replace_existing.setter def replace_existing(self, val: bool): + """Set the flag for replacing existing files.""" self._replace_existing = val def __init__( @@ -84,6 +85,7 @@ def __init__( def correct_name(self, name: str) -> str: """Correct bucket name to meet GCP naming requirements. + Currently it does nothing - no special requirements on GCP. Args: name: Original bucket name @@ -98,6 +100,10 @@ def _create_bucket( ) -> str: """Create a new Cloud Storage bucket or return existing one. + Checks if a bucket with a similar name (if `name` is a prefix) already exists + in the provided `buckets` list. If `randomize_name` is True, appends a + random string to make the name unique. + Args: name: Base name for the bucket buckets: List of existing bucket names to check @@ -161,6 +167,10 @@ def upload(self, bucket_name: str, filepath: str, key: str) -> None: def exists_bucket(self, bucket_name: str) -> bool: """Check if a Cloud Storage bucket exists. + Handles `exceptions.Forbidden` which can occur if the bucket exists + but is not accessible by the current credentials (treated as not existing + for SeBS purposes). + Args: bucket_name: Name of the bucket to check @@ -229,6 +239,8 @@ def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: Uploads a file to the appropriate benchmark bucket, respecting cache settings and replace_existing configuration. + This is primarily used by benchmarks to upload input data. + Args: path_idx: Index of the input path prefix key: Object key for the uploaded file diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 3a4924c6..744cece7 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -146,6 +146,10 @@ def sync_invoke(self, payload: Dict) -> ExecutionResult: def async_invoke(self, payload: Dict): """Asynchronously invoke the Cloud Function. + Note: This method is not currently implemented for GCP's LibraryTrigger. + GCP's `functions.call` API is synchronous. Asynchronous behavior could + need to be implemented using a thread pool or similar mechanism if desired. + Args: payload: Input data to send to the function diff --git a/sebs/local/__init__.py b/sebs/local/__init__.py index 4e71f98a..5bf08d0d 100644 --- a/sebs/local/__init__.py +++ b/sebs/local/__init__.py @@ -1,9 +1,8 @@ """SeBS local execution platform module. -This module provides the local execution platform for the Serverless Benchmarking Suite. -It enables running serverless functions locally using Docker containers, providing a -development and testing environment that mimics serverless execution without requiring -cloud platform deployment. +This module provides the local execution platform by running serverless functions +locally using Docker containers, providing a development and testing environment +that mimics serverless execution without requiring cloud platform deployment. Key components: - Local: Main system class for local function execution @@ -11,7 +10,7 @@ - Deployment: Manages deployments and memory measurements for local functions The local platform supports HTTP triggers and provides memory profiling capabilities -for performance analysis. +for performance analysis. It can also be integrated with local object and NoSQL storage. """ from .local import Local # noqa diff --git a/sebs/local/config.py b/sebs/local/config.py index 705b7f29..4cfa9795 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -51,12 +51,6 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return LocalCredentials() -""" - No need to cache and store - we prepare the benchmark and finish. - The rest is used later by the user. -""" - - class LocalResources(SelfHostedResources): """Resource management for local execution platform. @@ -64,6 +58,8 @@ class LocalResources(SelfHostedResources): for Docker containers and storage configurations. Tracks allocated ports to avoid conflicts when running multiple functions. + In local deployments, caching and storing resource details is minimal. + Attributes: _path: Path for local resource storage _allocated_ports: Set of ports currently allocated to containers @@ -161,9 +157,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class LocalConfig(Config): """Configuration class for local execution platform. - Provides the main configuration interface for the local platform, - combining credentials and resources. The local platform requires - minimal configuration since it runs functions locally. + No extra configuration - just implementation of the required interfaces. Attributes: _credentials: Local credentials instance (empty) diff --git a/sebs/local/deployment.py b/sebs/local/deployment.py index 76895e6e..86365297 100644 --- a/sebs/local/deployment.py +++ b/sebs/local/deployment.py @@ -4,13 +4,6 @@ including memory measurement collection, function lifecycle management, and resource cleanup. -The Deployment class handles: -- Function container management -- Memory measurement process coordination -- Input/output serialization -- Storage configuration -- Resource cleanup and shutdown - Classes: Deployment: Main deployment management class for local functions """ @@ -31,10 +24,6 @@ class Deployment(LoggingBase): """Manages local function deployments and memory measurements. - Coordinates the lifecycle of locally deployed functions, including container - management, memory measurement collection, and resource cleanup. Handles - serialization of deployment state for persistence and recovery. - Attributes: _functions: List of deployed local functions _storage: Optional Minio storage instance @@ -73,6 +62,8 @@ def __init__(self): def add_function(self, func: LocalFunction) -> None: """Add a function to the deployment. + If the function has a memory measurement PID, it's also recorded. + Args: func: Local function to add to the deployment """ @@ -99,6 +90,8 @@ def set_storage(self, storage: Minio) -> None: def serialize(self, path: str) -> None: """Serialize deployment configuration to file. + Includes details about functions, storage, inputs, and memory measurements. + Args: path: File path to write serialized deployment configuration """ diff --git a/sebs/local/function.py b/sebs/local/function.py index 11546517..f141f58a 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -1,8 +1,6 @@ """Function and trigger implementations for local execution platform. -This module provides classes for managing functions and triggers in the local -execution environment. Functions run as Docker containers with HTTP triggers -for invocation. +Functions run as Docker containers with HTTP triggers for invocation. Classes: HTTPTrigger: HTTP-based trigger for local function invocation @@ -105,8 +103,7 @@ class LocalFunction(Function): """Function implementation for local execution platform. Represents a serverless function running locally in a Docker container. - Handles container management, URL resolution, and memory measurement - process tracking. + Handles container management and URL resolution. Attributes: _instance: Docker container running the function @@ -128,6 +125,8 @@ def __init__( ): """Initialize local function. + Determines the invocation URL based on the Docker container's network settings. + Args: docker_container: Docker container instance running the function port: Port number the function is listening on diff --git a/sebs/local/local.py b/sebs/local/local.py index 415793c1..643918a0 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -1,7 +1,6 @@ """Local execution platform for SeBS. -This module implements the local execution platform for the Serverless Benchmarking -Suite. It runs serverless functions locally using Docker containers, providing a +It runs serverless functions locally using Docker containers, providing a development and testing environment that mimics serverless execution without requiring cloud platform deployment. @@ -41,16 +40,6 @@ class Local(System): """Local execution platform implementation. - Implements the System interface for running serverless functions locally using - Docker containers. Provides development and testing capabilities without requiring - cloud platform deployment. - - This platform supports: - - HTTP-triggered function execution - - Memory measurement and profiling - - Multi-container deployment - - Cross-platform operation (Linux, macOS, Windows) - Attributes: DEFAULT_PORT: Default port number for function containers (9000) _config: Local platform configuration @@ -401,7 +390,7 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "LocalFunction": - """Create a new function deployment. + """Create a new function deployment. In practice, it starts a new Docker container. Args: code_package: Benchmark code package to deploy diff --git a/sebs/local/measureMem.py b/sebs/local/measureMem.py index d6444883..691a54be 100644 --- a/sebs/local/measureMem.py +++ b/sebs/local/measureMem.py @@ -1,8 +1,8 @@ """Memory measurement utility for Docker containers. -This module provides functionality to measure memory consumption of Docker containers -running local serverless functions. It reads memory usage from the cgroup filesystem -at regular intervals and writes the measurements to a file for later analysis. +This script periodically reads the `memory.current` file from the container's +cgroup to record its memory usage. The measurements +are appended to a specified output file. The measurement process: 1. Reads memory.current from the container's cgroup diff --git a/sebs/openwhisk/__init__.py b/sebs/openwhisk/__init__.py index 572c592c..b25f6488 100644 --- a/sebs/openwhisk/__init__.py +++ b/sebs/openwhisk/__init__.py @@ -1,11 +1,7 @@ """Apache OpenWhisk integration module for SeBS. -This module provides the complete OpenWhisk integration for the Serverless -Benchmarking Suite (SeBS), including platform-specific implementations for -function deployment, configuration management, and execution. - -The module includes: -- OpenWhisk system integration and function management +This module provides the complete OpenWhisk integration: +- OpenWhisk system and function management - Configuration classes for credentials and resources - Function and trigger implementations - Docker container management diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py index 487debfa..90a7cac0 100644 --- a/sebs/openwhisk/config.py +++ b/sebs/openwhisk/config.py @@ -1,8 +1,7 @@ """ Configuration management for Apache OpenWhisk deployments in SeBS. -This module provides configuration classes for managing OpenWhisk-specific settings, -credentials, and resources. It handles Docker registry configuration, storage settings, +It handles Docker registry configuration, storage settings, and deployment parameters for OpenWhisk serverless functions. Classes: @@ -25,8 +24,8 @@ class OpenWhiskCredentials(Credentials): """ Manages authentication credentials for OpenWhisk deployments. - This class handles credential serialization and deserialization for OpenWhisk - platforms. Currently implements a minimal credential system. + Since we do not use extra credentials there, it just implements + the expected interface. Note: OpenWhisk deployments typically rely on local authentication through @@ -62,9 +61,7 @@ class OpenWhiskResources(SelfHostedResources): """ Manages Docker registry and storage resources for OpenWhisk deployments. - This class handles configuration of Docker registries, authentication credentials, - and storage resources needed for OpenWhisk function deployments. It supports - both user-provided and cached configurations. + This class handles configuration of Docker registry. Attributes: _docker_registry: Docker registry URL for storing function images @@ -269,9 +266,8 @@ class OpenWhiskConfig(Config): """ Main configuration class for OpenWhisk deployments. - This class manages all OpenWhisk-specific configuration settings including - cluster management, WSK CLI settings, and experimental features. It integrates - with the broader SeBS configuration system. + This class focuses on OpenWhisk-specific configuration settings: + cluster management, WSK CLI settings, and experimental features. Attributes: name: Platform name identifier ('openwhisk') diff --git a/sebs/openwhisk/container.py b/sebs/openwhisk/container.py index d6e35108..a684c0db 100644 --- a/sebs/openwhisk/container.py +++ b/sebs/openwhisk/container.py @@ -1,8 +1,8 @@ """Docker container management for OpenWhisk functions in SeBS. -This module provides OpenWhisk-specific Docker container management functionality, -handling Docker image registry configuration, image tagging, and repository naming -for OpenWhisk function deployments. +Its primary focus is supporting both DockerHub and custom, local Docker registries. +The latter make development and prototyping much faster and easier. +They also allow users to push new images. Classes: OpenWhiskContainer: OpenWhisk-specific Docker container management @@ -20,11 +20,6 @@ class OpenWhiskContainer(DockerContainer): """ OpenWhisk-specific Docker container management. - This class extends the base DockerContainer to provide OpenWhisk-specific - functionality for managing Docker images, registries, and container deployment. - It handles Docker registry authentication and image URI generation for - OpenWhisk function deployments. - Attributes: config: OpenWhisk configuration containing registry settings @@ -98,13 +93,6 @@ def registry_name( - Full repository name with registry prefix - Image tag - Complete image URI - - Example: - >>> registry, repo, tag, uri = container.registry_name( - ... "test", "python", "3.8", "x86_64" - ... ) - >>> # Returns: ("Docker Hub", "sebs", "openwhisk-test-python-3.8-x86_64", - ... "sebs:openwhisk-test-python-3.8-x86_64") """ registry_name = self.config.resources.docker_registry diff --git a/sebs/openwhisk/function.py b/sebs/openwhisk/function.py index 8c9b7c38..d0f0a211 100644 --- a/sebs/openwhisk/function.py +++ b/sebs/openwhisk/function.py @@ -1,9 +1,5 @@ """OpenWhisk function and configuration classes for SeBS. -This module provides OpenWhisk-specific implementations of function configuration -and function management for the SeBS benchmarking framework. It handles function -configuration serialization, Docker image management, and storage integration. - Classes: OpenWhiskFunctionConfig: Configuration data class for OpenWhisk functions OpenWhiskFunction: OpenWhisk-specific function implementation @@ -36,10 +32,10 @@ class OpenWhiskFunctionConfig(FunctionConfig): Note: The docker_image attribute should be merged with higher-level - image abstraction in future refactoring. + image abstraction in future refactoring. This is quite similar + to AWS deployments. """ - # FIXME: merge with higher level abstraction for images docker_image: str = "" namespace: str = "_" object_storage: Optional[MinioConfig] = None @@ -92,9 +88,8 @@ class OpenWhiskFunction(Function): """ OpenWhisk-specific function implementation for SeBS. - This class provides OpenWhisk-specific function management including - configuration handling, serialization, and trigger management. It integrates - with OpenWhisk actions and maintains Docker image information. + It does not implemnet anything non-standard, just implements + trigger and config types specific to OpenWhisk. Attributes: _cfg: OpenWhisk-specific function configuration @@ -105,7 +100,11 @@ class OpenWhiskFunction(Function): """ def __init__( - self, name: str, benchmark: str, code_package_hash: str, cfg: OpenWhiskFunctionConfig + self, + name: str, + benchmark: str, + code_package_hash: str, + cfg: OpenWhiskFunctionConfig, ) -> None: """ Initialize OpenWhisk function. @@ -167,7 +166,10 @@ def deserialize(cached_config: Dict[str, Any]) -> OpenWhiskFunction: cfg = OpenWhiskFunctionConfig.deserialize(cached_config["config"]) ret = OpenWhiskFunction( - cached_config["name"], cached_config["benchmark"], cached_config["hash"], cfg + cached_config["name"], + cached_config["benchmark"], + cached_config["hash"], + cfg, ) for trigger in cached_config["triggers"]: trigger_type = cast( diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index 29a51178..a32357c6 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -4,10 +4,6 @@ This module provides the main OpenWhisk system class that integrates OpenWhisk serverless platform with the SeBS benchmarking framework. It handles function deployment, execution, monitoring, and resource management for OpenWhisk clusters. - -The implementation supports both local and remote OpenWhisk deployments, -Docker-based function packaging, and various trigger types including HTTP -and library-based invocations. """ import os @@ -38,8 +34,10 @@ class OpenWhisk(System): This class provides the main integration between SeBS and Apache OpenWhisk, handling function deployment, execution, container management, and resource - allocation. It supports both local and remote OpenWhisk deployments with - Docker-based function packaging. + management (primarily self-hosted storage like Minio/ScyllaDB via SelfHostedSystemResources), + and interaction with the `wsk` CLI. + It supports OpenWhisk deployments with Docker-based function packaging. + We do not use code packages due to low package size limits. Attributes: _config: OpenWhisk-specific configuration settings @@ -63,6 +61,7 @@ def __init__( ) -> None: """ Initialize OpenWhisk system with configuration and clients. + Will log in to Docker registry. Args: system_config: Global SeBS system configuration @@ -193,7 +192,9 @@ def package_code( Creates both a Docker image and a ZIP archive containing the benchmark code. The ZIP archive is required for OpenWhisk function registration even when - using Docker-based deployment. + using Docker-based deployment. It contains only the main handlers + (`__main__.py` or `index.js`). The Docker image URI is returned, + which will be used when creating the action. Args: directory: Path to the benchmark code directory diff --git a/sebs/openwhisk/triggers.py b/sebs/openwhisk/triggers.py index 03956565..de8bcb38 100644 --- a/sebs/openwhisk/triggers.py +++ b/sebs/openwhisk/triggers.py @@ -1,8 +1,7 @@ """Trigger implementations for OpenWhisk function invocation in SeBS. This module provides different trigger types for invoking OpenWhisk functions, -including library-based (CLI) triggers and HTTP-based triggers. Each trigger -type handles the specific invocation method and result parsing for OpenWhisk. +including library-based (CLI) triggers and HTTP-based triggers. Classes: LibraryTrigger: CLI-based function invocation using wsk tool diff --git a/sebs/regression.py b/sebs/regression.py index a4fc6114..6660e263 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -50,20 +50,20 @@ benchmarks_nodejs = ["110.dynamic-html", "120.uploader", "210.thumbnailer"] # AWS-specific configurations -architectures_aws = ["x64", "arm64"] # Supported architectures -deployments_aws = ["package", "container"] # Deployment types +architectures_aws = ["x64", "arm64"] +deployments_aws = ["package", "container"] # GCP-specific configurations -architectures_gcp = ["x64"] # Supported architectures -deployments_gcp = ["package"] # Deployment types +architectures_gcp = ["x64"] +deployments_gcp = ["package"] # Azure-specific configurations -architectures_azure = ["x64"] # Supported architectures -deployments_azure = ["package"] # Deployment types +architectures_azure = ["x64"] +deployments_azure = ["package"] # OpenWhisk-specific configurations -architectures_openwhisk = ["x64"] # Supported architectures -deployments_openwhisk = ["container"] # Deployment types +architectures_openwhisk = ["x64"] +deployments_openwhisk = ["container"] # User-defined config passed during initialization, set in regression_suite() cloud_config: Optional[dict] = None @@ -263,10 +263,6 @@ class AWSTestSequencePython( ): """Test suite for Python benchmarks on AWS Lambda. - This test class runs all Python benchmarks on AWS Lambda, - using various architectures (x64, arm64) and deployment types - (package, container). Each test uses both library and HTTP triggers. - Attributes: benchmarks: List of Python benchmarks to test architectures: List of AWS architectures to test (x64, arm64) @@ -325,10 +321,6 @@ class AWSTestSequenceNodejs( ): """Test suite for Node.js benchmarks on AWS Lambda. - This test class runs all Node.js benchmarks on AWS Lambda, - using various architectures (x64, arm64) and deployment types - (package, container). Each test uses both library and HTTP triggers. - Attributes: benchmarks: List of Node.js benchmarks to test architectures: List of AWS architectures to test (x64, arm64) @@ -378,10 +370,6 @@ class AzureTestSequencePython( ): """Test suite for Python benchmarks on Azure Functions. - This test class runs all Python benchmarks on Azure Functions, - using x64 architecture and package deployment. Each test uses - HTTP triggers. - Attributes: benchmarks: List of Python benchmarks to test architectures: List of Azure architectures to test (x64) @@ -457,10 +445,6 @@ class AzureTestSequenceNodejs( ): """Test suite for Node.js benchmarks on Azure Functions. - This test class runs all Node.js benchmarks on Azure Functions, - using x64 architecture and package deployment. Each test uses - HTTP triggers. - Attributes: benchmarks: List of Node.js benchmarks to test architectures: List of Azure architectures to test (x64) @@ -531,10 +515,6 @@ class GCPTestSequencePython( ): """Test suite for Python benchmarks on Google Cloud Functions. - This test class runs all Python benchmarks on Google Cloud Functions, - using x64 architecture and package deployment. Each test uses - HTTP triggers. - Attributes: benchmarks: List of Python benchmarks to test architectures: List of GCP architectures to test (x64) @@ -584,10 +564,6 @@ class GCPTestSequenceNodejs( ): """Test suite for Node.js benchmarks on Google Cloud Functions. - This test class runs all Node.js benchmarks on Google Cloud Functions, - using x64 architecture and package deployment. Each test uses - HTTP triggers. - Attributes: benchmarks: List of Node.js benchmarks to test architectures: List of GCP architectures to test (x64) @@ -637,10 +613,6 @@ class OpenWhiskTestSequencePython( ): """Test suite for Python benchmarks on OpenWhisk. - This test class runs all Python benchmarks on OpenWhisk, - using x64 architecture and container deployment. Each test uses - HTTP triggers. - Attributes: benchmarks: List of Python benchmarks to test architectures: List of OpenWhisk architectures to test (x64) @@ -699,10 +671,6 @@ class OpenWhiskTestSequenceNodejs( ): """Test suite for Node.js benchmarks on OpenWhisk. - This test class runs all Node.js benchmarks on OpenWhisk, - using x64 architecture and container deployment. Each test uses - HTTP triggers. - Attributes: benchmarks: List of Node.js benchmarks to test architectures: List of OpenWhisk architectures to test (x64) diff --git a/sebs/sebs.py b/sebs/sebs.py index 907cca5b..993d652a 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -1,9 +1,6 @@ """Main SeBS (Serverless Benchmarking Suite) client implementation. -This module provides the main interface for the Serverless Benchmarking Suite, -offering a unified API for deploying, executing, and benchmarking serverless -functions across multiple cloud providers and locally. It manages: - +This module provides the main interface for SeBS: - Deployment client creation for different platforms (AWS, Azure, GCP, OpenWhisk, local) - Benchmark execution and configuration - Experiment setup and execution @@ -38,11 +35,6 @@ class SeBS(LoggingBase): """Main client for the Serverless Benchmarking Suite. - This class provides the primary interface for interacting with the benchmarking - suite. It manages deployment clients, benchmarks, experiments, and resources. - It handles caching, logging, and provides factory methods for creating the - various components needed for benchmarking. - Attributes: cache_client: Client for managing cached artifacts (code packages, etc.) docker_client: Docker client for container operations @@ -62,7 +54,7 @@ def cache_client(self) -> Cache: return self._cache_client @property - def docker_client(self) -> docker.client: + def docker_client(self) -> docker.client.DockerClient: """Get the Docker client. Returns: diff --git a/sebs/statistics.py b/sebs/statistics.py index 16804eea..01e3e385 100644 --- a/sebs/statistics.py +++ b/sebs/statistics.py @@ -39,9 +39,7 @@ def basic_stats(times: List[float]) -> BasicStats: def ci_tstudents(alpha: float, times: List[float]) -> Tuple[float, float]: """Compute parametric confidence interval using Student's t-distribution. - This function computes a confidence interval for the mean of the given - measurement times using Student's t-distribution. This is a parametric - method that assumes the data follows a normal distribution. + This is a parametric method that assumes the data follows a normal distribution. Args: alpha: Confidence level (e.g., 0.95 for 95% confidence) @@ -57,15 +55,12 @@ def ci_tstudents(alpha: float, times: List[float]) -> Tuple[float, float]: def ci_le_boudec(alpha: float, times: List[float]) -> Tuple[float, float]: """Compute non-parametric confidence interval using Le Boudec's method. - This function computes a confidence interval for the median of the given - measurement times using the method described by Le Boudec. This is a - non-parametric method that does not assume any particular distribution - of the data. + It requires a sufficient number of samples but it is a non-parametric + method that does not assume that data follows the normal distribution. Reference: - J.-Y. Le Boudec, "Methods for the Estimation of the Accuracy of - Measurements in Computer Performance Evaluation", - Performance Evaluation Review, 2010 + J.-Y. Le Boudec, "Performance Evaluation of Computer and + Communication Systems", 2010. Args: alpha: Confidence level (e.g., 0.95 for 95% confidence) diff --git a/sebs/storage/__init__.py b/sebs/storage/__init__.py index d4346656..65dec561 100644 --- a/sebs/storage/__init__.py +++ b/sebs/storage/__init__.py @@ -1,8 +1,7 @@ -"""Storage module for the Serverless Benchmarking Suite (SeBS). - -This module provides storage abstractions and implementations for SeBS, supporting -both object storage (S3-compatible) and NoSQL database storage. It includes: +"""This module provides storage abstractions and implementations for SeBS, +supporting both object storage (S3-compatible) and NoSQL database storage. +It includes: - Configuration classes for different storage backends - MinIO implementation for local S3-compatible object storage - ScyllaDB implementation for local DynamoDB-compatible NoSQL storage @@ -10,6 +9,8 @@ The storage module enables benchmarks to work with persistent data storage across different deployment environments while maintaining consistent interfaces. +Thus, we can seamlessly port benchmarks between clouds and open-source +serverless platforms. Key Components: - config: Configuration dataclasses for storage backends diff --git a/sebs/storage/config.py b/sebs/storage/config.py index d6fca392..e68262de 100644 --- a/sebs/storage/config.py +++ b/sebs/storage/config.py @@ -1,16 +1,5 @@ """Configuration classes for storage backends in the Serverless Benchmarking Suite. -This module provides dataclass-based configuration objects for different storage -backends supported by SeBS. It includes abstract base classes that define the -interface for storage configurations, as well as concrete implementations for -specific storage systems. - -Key Classes: - PersistentStorageConfig: Abstract base for object storage configurations - MinioConfig: Configuration for MinIO S3-compatible object storage - NoSQLStorageConfig: Abstract base for NoSQL database configurations - ScyllaDBConfig: Configuration for ScyllaDB DynamoDB-compatible storage - All configuration classes support serialization/deserialization for caching and provide environment variable mappings for runtime configuration. """ @@ -30,6 +19,8 @@ class PersistentStorageConfig(ABC): must implement. It provides methods for serialization and environment variable generation that are used for caching and runtime configuration. + This is used by MinioStorage in different deployments. + Subclasses must implement: - serialize(): Convert configuration to dictionary for caching - envs(): Generate environment variables for benchmark runtime @@ -56,7 +47,7 @@ def envs(self) -> Dict[str, str]: @dataclass class MinioConfig(PersistentStorageConfig): - """Configuration for MinIO S3-compatible object storage. + """Configuration for MinIO object storage. MinIO provides a local S3-compatible object storage service that runs in a Docker container. This configuration class stores all the necessary @@ -155,6 +146,9 @@ class NoSQLStorageConfig(ABC): must implement. It provides serialization methods used for caching and configuration management. + This class will be overidden by specific implementations for different + FaaS systems. + Subclasses must implement: - serialize(): Convert configuration to dictionary for caching """ diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index 54b2665a..0ceb0052 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -1,10 +1,9 @@ """ Module for MinIO S3-compatible storage in the Serverless Benchmarking Suite. -This module implements local object storage using MinIO, which provides an -S3-compatible API. MinIO runs in a Docker container and provides persistent +MinIO runs in a Docker container and provides persistent storage for benchmark data and results. It is primarily used for local -testing and development of S3-dependent serverless functions. +testing and on cloud platforms with no object storage, e.g., OpenWhisk. """ import copy @@ -27,11 +26,9 @@ class Minio(PersistentStorage): """ - S3-compatible object storage implementation using MinIO. - - This class manages a MinIO storage instance running in a Docker container, - providing S3-compatible object storage for local benchmarking. It handles - bucket creation, file uploads/downloads, and container lifecycle management. + This class manages a self-hosted MinIO storage instance running + in a Docker container. It handles bucket creation, file uploads/downloads, + and container lifecycle management. Attributes: config: MinIO configuration settings @@ -194,8 +191,9 @@ def configure_connection(self) -> None: Configure the connection to the MinIO container. Determines the appropriate address to connect to the MinIO container - based on the host platform. For Linux, it uses the container's IP address, - while for Windows, macOS, or WSL it uses localhost with the mapped port. + based on the host platform. For Linux, it uses the container's + bridge IP address, hile for Windows, macOS, or WSL it uses + localhost with the mapped port. Raises: RuntimeError: If the MinIO container is not available or if the IP address @@ -500,15 +498,6 @@ def serialize(self) -> Dict[str, Any]: """ return self._cfg.serialize() - """ - Deserialization and inheritance support - - This implementation supports overriding this class. The main Minio class - is used to start/stop deployments. When overriding the implementation in - Local/OpenWhisk/..., we call the _deserialize method and provide an - alternative implementation type. - """ - T = TypeVar("T", bound="Minio") @staticmethod @@ -523,7 +512,12 @@ def _deserialize( Creates a new instance of the specified class type from cached configuration data. This allows platform-specific versions to be deserialized correctly - while sharing the core implementation. + while sharing the core implementation. When overriding the implementation in + Local/OpenWhisk/..., we call the _deserialize method and provide an + alternative implementation type. + + FIXME: is this still needed? It looks like we stopped using + platform-specific implementations. Args: cached_config: Cached MinIO configuration diff --git a/sebs/storage/resources.py b/sebs/storage/resources.py index 2704d4d6..866cc5cc 100644 --- a/sebs/storage/resources.py +++ b/sebs/storage/resources.py @@ -1,20 +1,11 @@ """Resource management for self-hosted storage deployments in SeBS. -This module provides resource management classes for self-hosted storage -deployments, including both object storage (MinIO) and NoSQL storage (ScyllaDB). -It handles configuration deserialization, container lifecycle management, and -provides unified interfaces for accessing storage services. +Its main responsibility is providing consistent interface and cache +behavior of self-hosted storage for the entire SeBS system. Key Classes: SelfHostedResources: Configuration management for self-hosted storage resources SelfHostedSystemResources: System-level resource management and service provisioning - -The module supports: - - MinIO for S3-compatible object storage - - ScyllaDB for DynamoDB-compatible NoSQL storage - - Configuration caching and deserialization - - Docker container lifecycle management - - Dynamic service discovery and connection configuration """ import docker @@ -39,11 +30,6 @@ class SelfHostedResources(Resources): """Resource configuration for self-hosted storage deployments. - This class manages configuration for self-hosted storage services, - including object storage (MinIO) and NoSQL storage (ScyllaDB). It provides - serialization, caching, and deserialization capabilities for storage - configurations. - Attributes: _object_storage: Configuration for object storage (MinIO) _nosql_storage: Configuration for NoSQL storage (ScyllaDB) @@ -205,11 +191,6 @@ def _deserialize( class SelfHostedSystemResources(SystemResources): """System-level resource management for self-hosted storage deployments. - This class manages the lifecycle and provisioning of self-hosted storage - services, including MinIO object storage and ScyllaDB NoSQL storage. It - handles container management, service initialization, and provides unified - access to storage services. - Attributes: _name: Name of the deployment _logging_handlers: Logging configuration handlers @@ -245,8 +226,8 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStor """Get or create a persistent storage instance. Creates a MinIO storage instance if one doesn't exist, or returns the - existing instance. The storage is configured using the deployment's - storage configuration. + existing instance. The storage is deserialized from a serialized + config of an existing storage deployment. Args: replace_existing: Whether to replace existing buckets (optional) @@ -288,8 +269,9 @@ def get_nosql_storage(self) -> NoSQLStorage: """Get or create a NoSQL storage instance. Creates a ScyllaDB storage instance if one doesn't exist, or returns the - existing instance. The storage is configured using the deployment's - NoSQL storage configuration. + existing instance. The storage is deserialized from a serialized + config of an existing storage deployment. + Returns: NoSQLStorage: ScyllaDB storage instance diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index 512be703..7e311350 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -2,12 +2,8 @@ This module implements NoSQL database storage using ScyllaDB, which provides a DynamoDB-compatible API through its Alternator interface. ScyllaDB runs in a -Docker container and provides high-performance NoSQL storage for benchmark data -that requires DynamoDB-compatible operations. - -The implementation uses boto3 with ScyllaDB's Alternator API to provide seamless -compatibility with DynamoDB operations while running locally for development -and testing purposes. +Docker container, and the implementation uses boto3 while running locally +for development and testing purposes. """ import json @@ -85,6 +81,9 @@ def __init__( ): """Initialize a ScyllaDB storage instance. + It will initialize a boto3 client if the ScyllaDB + address is provided in the configuration. + Args: docker_client: Docker client for managing the ScyllaDB container cache_client: Cache client for storing storage configuration @@ -244,7 +243,6 @@ def stop(self) -> None: """Stop the ScyllaDB container. Gracefully stops the running ScyllaDB container if it exists. - Logs an error if the container is not known. """ if self._storage_container is not None: self.logging.info(f"Stopping ScyllaDB container at {self._cfg.address}.") @@ -272,13 +270,6 @@ def serialize(self) -> Tuple[StorageType, Dict[str, Any]]: """ return StorageType.SCYLLADB, self._cfg.serialize() - # Deserialization and inheritance support - # - # This implementation supports overriding this class. The main ScyllaDB class - # is used to start/stop deployments. When overriding the implementation in - # Local/OpenWhisk/..., we call the _deserialize method and provide an - # alternative implementation type. - T = TypeVar("T", bound="ScyllaDB") @staticmethod @@ -291,6 +282,9 @@ def _deserialize( data. This allows platform-specific versions to be deserialized correctly while sharing the core implementation. + FIXME: is this still needed? It looks like we stopped using + platform-specific implementations. + Args: cached_config: Cached ScyllaDB configuration cache_client: Cache client @@ -447,6 +441,8 @@ def create_table( Note: Unlike cloud providers with hierarchical database structures, ScyllaDB requires unique table names at the cluster level. + Note: PAY_PER_REQUEST billing mode has no effect here. + Args: benchmark: Name of the benchmark name: Logical table name diff --git a/sebs/types.py b/sebs/types.py index 617c0d11..a6d3c38d 100644 --- a/sebs/types.py +++ b/sebs/types.py @@ -11,8 +11,8 @@ class BenchmarkModule(str, Enum): """Types of benchmark modules. - This enum defines the different types of benchmark modules that can - be used by benchmark functions: + Benchmark modules indicate which additional packages and configuration + are needed for the benchmark to work correctly. - STORAGE: Object storage module for storing and retrieving files - NOSQL: NoSQL database module for storing and retrieving structured data diff --git a/sebs/utils.py b/sebs/utils.py index 95fbab80..69aebdbd 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -75,6 +75,7 @@ def default(self, o): def serialize(obj) -> str: """ Serialize an object to a JSON string. + Applies `serialize` method when defined by the object. Args: obj: Object to serialize