diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..9b27f696 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,28 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version, and other tools you might need +build: + os: ubuntu-24.04 + tools: + python: "3.13" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# Optionally, but recommended, +# declare the Python requirements required to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: requirements.docs.txt + - requirements: requirements.txt + - requirements: requirements.aws.txt + - requirements: requirements.azure.txt + - requirements: requirements.gcp.txt + - requirements: requirements.local.txt + diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/source/api/modules.rst b/docs/source/api/modules.rst new file mode 100644 index 00000000..d4bd9f34 --- /dev/null +++ b/docs/source/api/modules.rst @@ -0,0 +1,7 @@ +sebs +==== + +.. toctree:: + :maxdepth: 4 + + sebs diff --git a/docs/source/api/sebs.aws.rst b/docs/source/api/sebs.aws.rst new file mode 100644 index 00000000..43486972 --- /dev/null +++ b/docs/source/api/sebs.aws.rst @@ -0,0 +1,89 @@ +sebs.aws package +================ + +.. contents:: Table of Contents + :local: + :depth: 2 + +Submodules +---------- + +sebs.aws.aws module +------------------- + +.. automodule:: sebs.aws.aws + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.aws.config module +---------------------- + +.. automodule:: sebs.aws.config + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.aws.container module +------------------------- + +.. automodule:: sebs.aws.container + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.aws.dynamodb module +------------------------ + +.. automodule:: sebs.aws.dynamodb + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.aws.function module +------------------------ + +.. automodule:: sebs.aws.function + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.aws.resources module +------------------------- + +.. automodule:: sebs.aws.resources + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.aws.s3 module +------------------ + +.. automodule:: sebs.aws.s3 + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.aws.triggers module +------------------------ + +.. automodule:: sebs.aws.triggers + :members: + :show-inheritance: + :undoc-members: + :no-index: + +Module contents +--------------- + +.. automodule:: sebs.aws + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.azure.rst b/docs/source/api/sebs.azure.rst new file mode 100644 index 00000000..e8966380 --- /dev/null +++ b/docs/source/api/sebs.azure.rst @@ -0,0 +1,98 @@ +sebs.azure package +================== + +.. contents:: Table of Contents + :local: + :depth: 2 + +Submodules +---------- + +sebs.azure.azure module +----------------------- + +.. automodule:: sebs.azure.azure + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.azure.blob\_storage module +------------------------------- + +.. automodule:: sebs.azure.blob_storage + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.azure.cli module +--------------------- + +.. automodule:: sebs.azure.cli + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.azure.cloud\_resources module +---------------------------------- + +.. automodule:: sebs.azure.cloud_resources + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.azure.config module +------------------------ + +.. automodule:: sebs.azure.config + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.azure.cosmosdb module +-------------------------- + +.. automodule:: sebs.azure.cosmosdb + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.azure.function module +-------------------------- + +.. automodule:: sebs.azure.function + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.azure.system\_resources module +----------------------------------- + +.. automodule:: sebs.azure.system_resources + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.azure.triggers module +-------------------------- + +.. automodule:: sebs.azure.triggers + :members: + :show-inheritance: + :undoc-members: + :no-index: + +Module contents +--------------- + +.. automodule:: sebs.azure + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.experiments.rst b/docs/source/api/sebs.experiments.rst new file mode 100644 index 00000000..1b68b2cd --- /dev/null +++ b/docs/source/api/sebs.experiments.rst @@ -0,0 +1,98 @@ +sebs.experiments package +======================== + +.. contents:: Table of Contents + :local: + :depth: 2 + +Submodules +---------- + +sebs.experiments.config module +------------------------------ + +.. automodule:: sebs.experiments.config + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.experiments.environment module +----------------------------------- + +.. automodule:: sebs.experiments.environment + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.experiments.eviction\_model module +--------------------------------------- + +.. automodule:: sebs.experiments.eviction_model + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.experiments.experiment module +---------------------------------- + +.. automodule:: sebs.experiments.experiment + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.experiments.invocation\_overhead module +-------------------------------------------- + +.. automodule:: sebs.experiments.invocation_overhead + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.experiments.network\_ping\_pong module +------------------------------------------- + +.. automodule:: sebs.experiments.network_ping_pong + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.experiments.perf\_cost module +---------------------------------- + +.. automodule:: sebs.experiments.perf_cost + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.experiments.result module +------------------------------ + +.. automodule:: sebs.experiments.result + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.experiments.startup\_time module +------------------------------------- + +.. automodule:: sebs.experiments.startup_time + :members: + :show-inheritance: + :undoc-members: + :no-index: + +Module contents +--------------- + +.. automodule:: sebs.experiments + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.faas.rst b/docs/source/api/sebs.faas.rst new file mode 100644 index 00000000..4bfa5c9f --- /dev/null +++ b/docs/source/api/sebs.faas.rst @@ -0,0 +1,80 @@ +sebs.faas package +================= + +.. contents:: Table of Contents + :local: + :depth: 2 + +Submodules +---------- + +sebs.faas.config module +----------------------- + +.. automodule:: sebs.faas.config + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.faas.container module +-------------------------- + +.. automodule:: sebs.faas.container + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.faas.function module +------------------------- + +.. automodule:: sebs.faas.function + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.faas.nosql module +---------------------- + +.. automodule:: sebs.faas.nosql + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.faas.resources module +-------------------------- + +.. automodule:: sebs.faas.resources + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.faas.storage module +------------------------ + +.. automodule:: sebs.faas.storage + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.faas.system module +----------------------- + +.. automodule:: sebs.faas.system + :members: + :show-inheritance: + :undoc-members: + :no-index: + +Module contents +--------------- + +.. automodule:: sebs.faas + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.gcp.rst b/docs/source/api/sebs.gcp.rst new file mode 100644 index 00000000..fb389006 --- /dev/null +++ b/docs/source/api/sebs.gcp.rst @@ -0,0 +1,89 @@ +sebs.gcp package +================ + +.. contents:: Table of Contents + :local: + :depth: 2 + +Submodules +---------- + +sebs.gcp.cli module +------------------- + +.. automodule:: sebs.gcp.cli + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.gcp.config module +---------------------- + +.. automodule:: sebs.gcp.config + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.gcp.datastore module +------------------------- + +.. automodule:: sebs.gcp.datastore + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.gcp.function module +------------------------ + +.. automodule:: sebs.gcp.function + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.gcp.gcp module +------------------- + +.. automodule:: sebs.gcp.gcp + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.gcp.resources module +------------------------- + +.. automodule:: sebs.gcp.resources + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.gcp.storage module +----------------------- + +.. automodule:: sebs.gcp.storage + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.gcp.triggers module +------------------------ + +.. automodule:: sebs.gcp.triggers + :members: + :show-inheritance: + :undoc-members: + :no-index: + +Module contents +--------------- + +.. automodule:: sebs.gcp + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.local.rst b/docs/source/api/sebs.local.rst new file mode 100644 index 00000000..38da3a92 --- /dev/null +++ b/docs/source/api/sebs.local.rst @@ -0,0 +1,62 @@ +sebs.local package +================== + +.. contents:: Table of Contents + :local: + :depth: 2 + +Submodules +---------- + +sebs.local.config module +------------------------ + +.. automodule:: sebs.local.config + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.local.deployment module +---------------------------- + +.. automodule:: sebs.local.deployment + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.local.function module +-------------------------- + +.. automodule:: sebs.local.function + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.local.local module +----------------------- + +.. automodule:: sebs.local.local + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.local.measureMem module +---------------------------- + +.. automodule:: sebs.local.measureMem + :members: + :show-inheritance: + :undoc-members: + :no-index: + +Module contents +--------------- + +.. automodule:: sebs.local + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.openwhisk.rst b/docs/source/api/sebs.openwhisk.rst new file mode 100644 index 00000000..eaab1422 --- /dev/null +++ b/docs/source/api/sebs.openwhisk.rst @@ -0,0 +1,62 @@ +sebs.openwhisk package +====================== + +.. contents:: Table of Contents + :local: + :depth: 2 + +Submodules +---------- + +sebs.openwhisk.config module +---------------------------- + +.. automodule:: sebs.openwhisk.config + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.openwhisk.container module +------------------------------- + +.. automodule:: sebs.openwhisk.container + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.openwhisk.function module +------------------------------ + +.. automodule:: sebs.openwhisk.function + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.openwhisk.openwhisk module +------------------------------- + +.. automodule:: sebs.openwhisk.openwhisk + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.openwhisk.triggers module +------------------------------ + +.. automodule:: sebs.openwhisk.triggers + :members: + :show-inheritance: + :undoc-members: + :no-index: + +Module contents +--------------- + +.. automodule:: sebs.openwhisk + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.rst b/docs/source/api/sebs.rst new file mode 100644 index 00000000..84afa8ab --- /dev/null +++ b/docs/source/api/sebs.rst @@ -0,0 +1,100 @@ +sebs package +============ + +.. contents:: Table of Contents + :local: + :depth: 2 + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + sebs.aws + sebs.azure + sebs.experiments + sebs.faas + sebs.gcp + sebs.local + sebs.openwhisk + sebs.storage + +Submodules +---------- + +sebs.benchmark module +--------------------- + +.. automodule:: sebs.benchmark + :members: + :undoc-members: + :show-inheritance: + :no-index: + +sebs.cache module +----------------- + +.. automodule:: sebs.cache + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.config module +------------------ + +.. automodule:: sebs.config + :members: + :show-inheritance: + :undoc-members: + +sebs.sebs module +---------------- + +.. automodule:: sebs.sebs + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.statistics module +---------------------- + +.. automodule:: sebs.statistics + :members: + :show-inheritance: + :undoc-members: + +sebs.types module +----------------- + +.. automodule:: sebs.types + :members: + :show-inheritance: + :undoc-members: + +sebs.utils module +----------------- + +.. automodule:: sebs.utils + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.version module +------------------- + +.. automodule:: sebs.version + :members: + :show-inheritance: + :undoc-members: + +Module contents +--------------- + +.. automodule:: sebs + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/api/sebs.storage.rst b/docs/source/api/sebs.storage.rst new file mode 100644 index 00000000..a57e381a --- /dev/null +++ b/docs/source/api/sebs.storage.rst @@ -0,0 +1,53 @@ +sebs.storage package +==================== + +.. contents:: Table of Contents + :local: + :depth: 2 + +Submodules +---------- + +sebs.storage.config module +-------------------------- + +.. automodule:: sebs.storage.config + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.storage.minio module +------------------------- + +.. automodule:: sebs.storage.minio + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.storage.resources module +----------------------------- + +.. automodule:: sebs.storage.resources + :members: + :show-inheritance: + :undoc-members: + :no-index: + +sebs.storage.scylladb module +---------------------------- + +.. automodule:: sebs.storage.scylladb + :members: + :show-inheritance: + :undoc-members: + :no-index: + +Module contents +--------------- + +.. automodule:: sebs.storage + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..3cafa3c2 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,38 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import os +import sys + +sys.path.insert(0, os.path.abspath("../..")) +# sys.path.insert(0, os.path.abspath('../../sebs')) + +project = "sebs" +copyright = "2024, Marcin Copik" +author = "Marcin Copik" +release = "1.2" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["sphinx.ext.napoleon", "sphinx.ext.autodoc", "sphinx.ext.viewcode"] + +templates_path = ["_templates"] +exclude_patterns = [] + +# -- Autodoc configuration -------------------------------------------------- +# Let RST files control documentation generation explicitly to avoid duplicates + +# Suppress duplicate object warnings +suppress_warnings = ['autosectionlabel.*'] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..671a0ebc --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,18 @@ +.. SeBS documentation master file, created by + sphinx-quickstart on Sat Dec 14 03:35:29 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +SeBS documentation +================== + +Add your content using ``reStructuredText`` syntax. See the +`reStructuredText `_ +documentation for details. + + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + api/modules diff --git a/requirements.docs.txt b/requirements.docs.txt new file mode 100644 index 00000000..c17ae307 --- /dev/null +++ b/requirements.docs.txt @@ -0,0 +1,2 @@ +Sphinx==8.2.3 +sphinx-rtd-theme==3.0.2 diff --git a/sebs/aws/__init__.py b/sebs/aws/__init__.py index 44df1200..3f1bfe4b 100644 --- a/sebs/aws/__init__.py +++ b/sebs/aws/__init__.py @@ -1,3 +1,27 @@ +"""AWS module for the Serverless Benchmarking Suite (SeBS). + +This module provides the AWS implementation of the SeBS framework, enabling +deployment and management of serverless functions on AWS Lambda. It includes +comprehensive support for AWS services including Lambda, S3, DynamoDB, ECR, +and API Gateway. + +Key components: + AWS: Main AWS system implementation + LambdaFunction: AWS Lambda function representation + AWSConfig: AWS-specific configuration management + S3: Object storage implementation for S3 + DynamoDB: Key-value store implementation for DynamoDB + +The module handles AWS-specific functionality including: +- Lambda function deployment and management +- Container deployments via ECR +- S3 storage for code packages and data +- DynamoDB NoSQL storage +- API Gateway HTTP triggers +- IAM role management +- CloudWatch metrics collection +""" + from .aws import AWS, LambdaFunction # noqa from .config import AWSConfig # noqa from .s3 import S3 # noqa diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 243a6f0f..12aeaff8 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -1,3 +1,12 @@ +""" +AWS Lambda implementation for the SeBs framework. + +This module provides the AWS implementation of the FaaS System interface. +It handles deploying and managing serverless functions on AWS Lambda, +including code packaging, function creation, trigger management, and +metrics collection. +""" + import math import os import shutil @@ -25,35 +34,71 @@ class AWS(System): + """ + AWS Lambda implementation of the System interface. + + This class implements the FaaS System interface for AWS Lambda, + providing methods for deploying, invoking, and managing Lambda functions. + + Attributes: + logs_client: AWS CloudWatch Logs client + cached: Whether AWS resources have been cached + _config: AWS-specific configuration + """ + logs_client = None cached = False _config: AWSConfig @staticmethod - def name(): + def name() -> str: + """ + Get the name of this system. + + Returns: + str: System name ('aws') + """ return "aws" @staticmethod - def typename(): + def typename() -> str: + """ + Get the type name of this system. + + Returns: + str: Type name ('AWS') + """ return "AWS" @staticmethod def function_type() -> "Type[Function]": + """ + Get the function type for this system. + + Returns: + Type[Function]: LambdaFunction class + """ return LambdaFunction @property def config(self) -> AWSConfig: + """ + Get the AWS-specific configuration. + + Returns: + AWSConfig: AWS configuration + """ return self._config @property def system_resources(self) -> AWSSystemResources: - return cast(AWSSystemResources, self._system_resources) + """ + Get the AWS system resources manager. - """ - :param cache_client: Function cache instance - :param config: Experiments config - :param docker_client: Docker instance - """ + Returns: + AWSSystemResources: AWS resource manager + """ + return cast(AWSSystemResources, self._system_resources) def __init__( self, @@ -63,6 +108,16 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize the AWS system. + + Args: + sebs_config: SeBs system configuration + config: AWS-specific configuration + cache_client: Cache client for caching resources + docker_client: Docker client for building images + logger_handlers: Logging configuration + """ super().__init__( sebs_config, cache_client, @@ -75,6 +130,16 @@ def __init__( self.nosql_storage: Optional[DynamoDB] = None def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize AWS resources. + + Creates a boto3 session, initializes Lambda client, and prepares + system resources and ECR client. + + Args: + config: Additional configuration parameters + resource_prefix: Optional prefix for resource names + """ # thread-safe self.session = boto3.session.Session( aws_access_key_id=self.config.credentials.access_key, @@ -89,6 +154,12 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] ) def get_lambda_client(self): + """ + Get or create an AWS Lambda client. + + Returns: + boto3.client: Lambda client + """ if not hasattr(self, "client"): self.client = self.session.client( service_name="lambda", @@ -96,24 +167,6 @@ def get_lambda_client(self): ) return self.client - """ - It would be sufficient to just pack the code and ship it as zip to AWS. - However, to have a compatible function implementation across providers, - we create a small module. - Issue: relative imports in Python when using storage wrapper. - Azure expects a relative import inside a module thus it's easier - to always create a module. - - Structure: - function - - function.py - - storage.py - - resources - handler.py - - benchmark: benchmark name - """ - def package_code( self, directory: str, @@ -124,6 +177,42 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """ + Package code for deployment to AWS Lambda. + + Creates a suitable deployment package with the following structure:: + + function/ + - function.py + - storage.py + - resources/ + handler.py + + It would be sufficient to just pack the code and ship it as zip to AWS. + However, to have a compatible function implementation across providers, + we create a small module. + Issue: relative imports in Python when using storage wrapper. + Azure expects a relative import inside a module thus it's easier + to always create a module. + + For container deployments, builds a Docker image and pushes it to ECR. + For ZIP deployments, creates a ZIP package compatible with Lambda. + + Args: + directory: Path to the code directory + language_name: Programming language name (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target CPU architecture (e.g., 'x64', 'arm64') + benchmark: Benchmark name + is_cached: Whether code is already cached + container_deployment: Whether to use container deployment + + Returns: + Tuple containing: + - Path to the packaged code (ZIP file) + - Size of the package in bytes + - Container URI (if container_deployment=True, otherwise empty string) + """ container_uri = "" @@ -131,7 +220,12 @@ def package_code( if container_deployment: # build base image and upload to ECR _, container_uri = self.ecr_client.build_base_image( - directory, language_name, language_version, architecture, benchmark, is_cached + directory, + language_name, + language_version, + architecture, + benchmark, + is_cached, ) CONFIG_FILES = { @@ -163,13 +257,33 @@ def package_code( ) def _map_architecture(self, architecture: str) -> str: + """ + Map architecture name to AWS Lambda-compatible format. + Args: + architecture: Architecture name from SeBs (e.g., 'x64') + + Returns: + str: AWS Lambda-compatible architecture name (e.g., 'x86_64') + """ if architecture == "x64": return "x86_64" return architecture - def _map_language_runtime(self, language: str, runtime: str): + def _map_language_runtime(self, language: str, runtime: str) -> str: + """ + Map language runtime to AWS Lambda-compatible format. + + AWS uses different naming schemes for runtime versions. + For example, Node.js uses '12.x' instead of '12'. + + Args: + language: Language name (e.g., 'nodejs', 'python') + runtime: Runtime version (e.g., '12', '3.8') + Returns: + str: AWS Lambda-compatible runtime version + """ # AWS uses different naming scheme for Node.js versions # For example, it's 12.x instead of 12. if language == "nodejs": @@ -183,6 +297,21 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "LambdaFunction": + """ + Create or update an AWS Lambda function. + + If the function already exists, it updates the code and configuration. + Otherwise, it creates a new function with the specified parameters. + + Args: + code_package: Benchmark code package + func_name: Name of the function + container_deployment: Whether to use container deployment + container_uri: URI of the container image (if container_deployment=True) + + Returns: + LambdaFunction: The created or updated Lambda function + """ package = code_package.code_location benchmark = code_package.benchmark @@ -286,7 +415,15 @@ def create_function( return lambda_function - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """Set up triggers for a cached function. + + Configures triggers for a function that was loaded from cache, + ensuring they have proper logging handlers and deployment client references. + + Args: + function: Function instance to configure triggers for + """ from sebs.aws.triggers import LibraryTrigger @@ -296,17 +433,6 @@ def cached_function(self, function: Function): for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers - """ - Update function code and configuration on AWS. - - :param benchmark: benchmark name - :param name: function name - :param code_package: path to code package - :param code_size: size of code package in bytes - :param timeout: function timeout in seconds - :param memory: memory limit for function - """ - def update_function( self, function: Function, @@ -314,6 +440,19 @@ def update_function( container_deployment: bool, container_uri: str, ): + """ + Update an existing AWS Lambda function. + + Updates the function code and waits for the update to complete. + For container deployments, updates the container image. + For ZIP deployments, uploads the code package directly or via S3. + + Args: + function: The function to update + code_package: Benchmark code package + container_deployment: Whether to use container deployment + container_uri: URI of the container image (if container_deployment=True) + """ name = function.name function = cast(LambdaFunction, function) @@ -359,7 +498,21 @@ def update_function( def update_function_configuration( self, function: Function, code_package: Benchmark, env_variables: dict = {} - ): + ) -> None: + """Update Lambda function configuration. + + Updates the function's timeout, memory, and environment variables. + Automatically adds environment variables for NoSQL storage table names + if the benchmark uses NoSQL storage. + + Args: + function: Function to update + code_package: Benchmark code package with configuration + env_variables: Additional environment variables to set + + Raises: + AssertionError: If code package input has not been processed + """ # We can only update storage configuration once it has been processed for this benchmark assert code_package.has_input_processed @@ -405,6 +558,19 @@ def update_function_configuration( def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """Generate default function name for a benchmark. + + Creates a standardized function name based on resource ID, benchmark name, + language, version, and architecture. Ensures the name is compatible with + AWS Lambda naming requirements. + + Args: + code_package: Benchmark code package + resources: Optional resources object (uses default if not provided) + + Returns: + str: Formatted function name suitable for AWS Lambda + """ # Create function name resource_id = resources.resources_id if resources else self.config.resources.resources_id func_name = "sebs-{}-{}-{}-{}-{}".format( @@ -420,47 +586,59 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """Format function name for AWS Lambda compatibility. + + AWS Lambda has specific naming requirements. This method ensures + the function name complies with AWS Lambda naming rules. + + Args: + func_name: Raw function name + + Returns: + str: Formatted function name with illegal characters replaced + """ # AWS Lambda does not allow hyphens in function names func_name = func_name.replace("-", "_") func_name = func_name.replace(".", "_") return func_name - """ - FIXME: does not clean the cache - """ + def delete_function(self, func_name: Optional[str]) -> None: + """Delete an AWS Lambda function. + + Args: + func_name: Name of the function to delete - def delete_function(self, func_name: Optional[str]): + Note: + FIXME: does not clean the cache in SeBS. + """ self.logging.debug("Deleting function {}".format(func_name)) try: self.client.delete_function(FunctionName=func_name) except Exception: self.logging.debug("Function {} does not exist!".format(func_name)) - """ - Prepare AWS resources to store experiment results. - Allocate one bucket. - - :param benchmark: benchmark name - :return: name of bucket to store experiment results - """ - - # def prepare_experiment(self, benchmark: str): - # logs_bucket = self.get_storage().add_output_bucket(benchmark, suffix="logs") - # return logs_bucket - - """ - Accepts AWS report after function invocation. - Returns a dictionary filled with values with various metrics such as - time, invocation time and memory consumed. - - :param log: decoded log from CloudWatch or from synchronuous invocation - :return: dictionary with parsed values - """ - @staticmethod def parse_aws_report( log: str, requests: Union[ExecutionResult, Dict[str, ExecutionResult]] ) -> str: + """Parse AWS Lambda execution report from CloudWatch logs. + + Extracts execution metrics from AWS Lambda log entries and updates + the corresponding ExecutionResult objects with timing, memory, + billing information, and init duration (when provided). + + Args: + log: Raw log string from CloudWatch or synchronous invocation + requests: Either a single ExecutionResult or dictionary mapping + request IDs to ExecutionResult objects + + Returns: + str: Request ID of the parsed execution + + Example: + The log format expected is tab-separated AWS Lambda report format: + "REPORT RequestId: abc123\tDuration: 100.00 ms\tBilled Duration: 100 ms\t..." + """ aws_vals = {} for line in log.split("\t"): if not line.isspace(): @@ -487,9 +665,26 @@ def parse_aws_report( return request_id def shutdown(self) -> None: + """Shutdown the AWS system and clean up resources. + + Calls the parent shutdown method to perform standard cleanup. + """ super().shutdown() - def get_invocation_error(self, function_name: str, start_time: int, end_time: int): + def get_invocation_error(self, function_name: str, start_time: int, end_time: int) -> None: + """Retrieve and log invocation errors from CloudWatch Logs. + + Queries CloudWatch Logs for error messages during the specified time range + and logs them for debugging purposes. + + Args: + function_name: Name of the Lambda function + start_time: Start time for log query (Unix timestamp) + end_time: End time for log query (Unix timestamp) + + Note: + It is unclear at the moment if this function is always working correctly. + """ if not self.logs_client: self.logs_client = boto3.client( service_name="logs", @@ -532,7 +727,19 @@ def download_metrics( end_time: int, requests: Dict[str, ExecutionResult], metrics: dict, - ): + ) -> None: + """Download execution metrics from CloudWatch Logs. + + Queries CloudWatch Logs for Lambda execution reports and parses them + to extract performance metrics for each request. + + Args: + function_name: Name of the Lambda function + start_time: Start time for metrics collection (Unix timestamp) + end_time: End time for metrics collection (Unix timestamp) + requests: Dictionary mapping request IDs to ExecutionResult objects + metrics: Dictionary to store collected metrics + """ if not self.logs_client: self.logs_client = boto3.client( @@ -576,6 +783,21 @@ def download_metrics( ) def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """Create a trigger for the specified function. + + Creates and configures a trigger based on the specified type. Currently + supports HTTP triggers (via API Gateway) and library triggers. + + Args: + func: Function to create trigger for + trigger_type: Type of trigger to create (HTTP or LIBRARY) + + Returns: + Trigger: The created trigger instance + + Raises: + RuntimeError: If trigger type is not supported + """ from sebs.aws.triggers import HTTPTrigger function = cast(LambdaFunction, func) @@ -610,13 +832,31 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T self.cache_client.update_function(function) return trigger - def _enforce_cold_start(self, function: Function, code_package: Benchmark): + def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> None: + """Enforce cold start for a single function. + + Updates the function's environment variables to force a cold start + on the next invocation. + + Args: + function: Function to enforce cold start for + code_package: Benchmark code package with configuration + """ func = cast(LambdaFunction, function) self.update_function_configuration( func, code_package, {"ForceColdStart": str(self.cold_start_counter)} ) - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """Enforce cold start for multiple functions. + + Updates all specified functions to force cold starts on their next invocations. + This is useful for ensuring consistent performance measurements. + + Args: + functions: List of functions to enforce cold start for + code_package: Benchmark code package with configuration + """ self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func, code_package) @@ -626,19 +866,40 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) self.wait_function_updated(lambda_function) self.logging.info("Finished function updates enforcing cold starts.") - def wait_function_active(self, func: LambdaFunction): + def wait_function_active(self, func: LambdaFunction) -> None: + """Wait for Lambda function to become active after creation. + + Uses AWS Lambda waiter to wait until the function is in Active state + and ready to be invoked. + + Args: + func: Lambda function to wait for + """ self.logging.info("Waiting for Lambda function to be created...") waiter = self.client.get_waiter("function_active_v2") waiter.wait(FunctionName=func.name) self.logging.info("Lambda function has been created.") - def wait_function_updated(self, func: LambdaFunction): + def wait_function_updated(self, func: LambdaFunction) -> None: + """Wait for Lambda function to complete update process. + + Uses AWS Lambda waiter to wait until the function update is complete + and the function is ready to be invoked with new configuration. + + Args: + func: Lambda function to wait for + """ self.logging.info("Waiting for Lambda function to be updated...") waiter = self.client.get_waiter("function_updated_v2") waiter.wait(FunctionName=func.name) self.logging.info("Lambda function has been updated.") - def disable_rich_output(self): + def disable_rich_output(self) -> None: + """Disable rich output formatting for ECR operations. + + Disables colored/formatted output in the ECR container client, + useful for CI/CD environments or when plain text output is preferred. + """ self.ecr_client.disable_rich_output = True diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 2d05e842..ee28bdab 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -1,3 +1,16 @@ +"""Configuration management for AWS SeBS integration. + +This module provides configuration classes for AWS credentials, resources, and settings +used when deploying to AWS Lambda. It handles +AWS authentication, resource management including ECR repositories, IAM roles, and +HTTP APIs, along with caching and serialization capabilities. + +Key classes: + AWSCredentials: Manages AWS access credentials and account information + AWSResources: Manages AWS resources like ECR repositories, IAM roles, and HTTP APIs + AWSConfig: Main configuration container combining credentials and resources +""" + import base64 import json import os @@ -14,42 +27,111 @@ class AWSCredentials(Credentials): - def __init__(self, access_key: str, secret_key: str): + """AWS authentication credentials for SeBS. + + This class manages AWS access credentials including access key, secret key, + and automatically retrieves the associated AWS account ID through STS. + + Account ID is cached to retain information on which account was the benchmark + executed. Credentials are not cached. + + Attributes: + _access_key: AWS access key ID + _secret_key: AWS secret access key + _account_id: AWS account ID retrieved via STS + """ + + def __init__(self, access_key: str, secret_key: str) -> None: + """Initialize AWS credentials. + + Args: + access_key: AWS access key ID + secret_key: AWS secret access key + + Raises: + ClientError: If AWS credentials are invalid or STS call fails + """ super().__init__() self._access_key = access_key self._secret_key = secret_key client = boto3.client( - "sts", aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key + "sts", + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, ) self._account_id = client.get_caller_identity()["Account"] @staticmethod def typename() -> str: + """Get the type name for these credentials. + + Returns: + str: The type name 'AWS.Credentials' + """ return "AWS.Credentials" @property def access_key(self) -> str: + """Get the AWS access key ID. + + Returns: + str: AWS access key ID + """ return self._access_key @property def secret_key(self) -> str: + """Get the AWS secret access key. + + Returns: + str: AWS secret access key + """ return self._secret_key @property def account_id(self) -> str: + """Get the AWS account ID. + + Returns: + str: AWS account ID + """ return self._account_id @staticmethod def initialize(dct: dict) -> "AWSCredentials": + """Initialize AWS credentials from a dictionary. + + Args: + dct: Dictionary containing 'access_key' and 'secret_key' + + Returns: + AWSCredentials: Initialized credentials object + + Raises: + KeyError: If required keys are missing from dictionary + """ return AWSCredentials(dct["access_key"], dct["secret_key"]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """Deserialize AWS credentials from configuration and cache. + + Loads AWS credentials from configuration file, environment variables, or cache. + Validates that credentials match cached account ID if available. + + Args: + config: Configuration dictionary that may contain credentials + cache: Cache instance for retrieving/storing credentials + handlers: Logging handlers for error reporting - # FIXME: update return types of both functions to avoid cast - # needs 3.7+ to support annotations + Returns: + Credentials: Deserialized AWSCredentials instance + + Raises: + RuntimeError: If credentials are missing or don't match cached account + """ cached_config = cache.get_config("aws") ret: AWSCredentials account_id: Optional[str] = None @@ -85,33 +167,96 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden ret.logging_handlers = handlers return ret - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update the cache with current credentials. + + Args: + cache: Cache instance to update + """ cache.update_config(val=self.account_id, keys=["aws", "credentials", "account_id"]) def serialize(self) -> dict: + """Serialize credentials to a dictionary. + + Returns: + dict: Dictionary containing account_id + """ out = {"account_id": self._account_id} return out class AWSResources(Resources): + """AWS resource management for SeBS. + + This class manages AWS-specific resources including ECR repositories, + IAM roles, HTTP APIs, and Docker registry configurations. It provides + methods for creating and managing these resources with caching support. + + Attributes: + _docker_registry: Docker registry URL (ECR repository URI) + _docker_username: Docker registry username + _docker_password: Docker registry password + _container_repository: ECR repository name + _lambda_role: IAM role ARN for Lambda execution + _http_apis: Dictionary of HTTP API configurations + """ + class HTTPApi: - def __init__(self, arn: str, endpoint: str): + """HTTP API configuration for AWS API Gateway. + + Represents an HTTP API resource in AWS API Gateway with its ARN and endpoint. + + Attributes: + _arn: API Gateway ARN + _endpoint: API Gateway endpoint URL + """ + + def __init__(self, arn: str, endpoint: str) -> None: + """Initialize HTTP API configuration. + + Args: + arn: API Gateway ARN + endpoint: API Gateway endpoint URL + """ self._arn = arn self._endpoint = endpoint @property def arn(self) -> str: + """Get the API Gateway ARN. + + Returns: + str: API Gateway ARN + """ return self._arn @property def endpoint(self) -> str: + """Get the API Gateway endpoint URL. + + Returns: + str: API Gateway endpoint URL + """ return self._endpoint @staticmethod def deserialize(dct: dict) -> "AWSResources.HTTPApi": + """Deserialize HTTP API from dictionary. + + Args: + dct: Dictionary containing 'arn' and 'endpoint' + + Returns: + AWSResources.HTTPApi: Deserialized HTTP API instance + """ return AWSResources.HTTPApi(dct["arn"], dct["endpoint"]) def serialize(self) -> dict: + """Serialize HTTP API to dictionary. + + Returns: + dict: Dictionary containing arn and endpoint + """ out = {"arn": self.arn, "endpoint": self.endpoint} return out @@ -120,7 +265,14 @@ def __init__( registry: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, - ): + ) -> None: + """Initialize AWS resources. + + Args: + registry: Docker registry URL (ECR repository URI) + username: Docker registry username + password: Docker registry password + """ super().__init__(name="aws") self._docker_registry: Optional[str] = registry if registry != "" else None self._docker_username: Optional[str] = username if username != "" else None @@ -131,25 +283,65 @@ def __init__( @staticmethod def typename() -> str: + """Get the type name for these resources. + + Returns: + str: The type name 'AWS.Resources' + """ return "AWS.Resources" @property def docker_registry(self) -> Optional[str]: + """Get the Docker registry URL. + + Returns: + Optional[str]: Docker registry URL (ECR repository URI) + """ return self._docker_registry @property def docker_username(self) -> Optional[str]: + """Get the Docker registry username. + + Returns: + Optional[str]: Docker registry username + """ return self._docker_username @property def docker_password(self) -> Optional[str]: + """Get the Docker registry password. + + Returns: + Optional[str]: Docker registry password + """ return self._docker_password @property def container_repository(self) -> Optional[str]: + """Get the ECR repository name. + + Returns: + Optional[str]: ECR repository name + """ return self._container_repository def lambda_role(self, boto3_session: boto3.session.Session) -> str: + """Get or create IAM role for Lambda execution. + + Creates a Lambda execution role with S3 and basic execution permissions + if it doesn't already exist. The role allows Lambda functions to access + S3 and write CloudWatch logs. + + Args: + boto3_session: Boto3 session for AWS API calls + + Returns: + str: Lambda execution role ARN + + Raises: + ClientError: If IAM operations fail + """ if not self._lambda_role: iam_client = boto3_session.client(service_name="iam") trust_policy = { @@ -190,6 +382,23 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: def http_api( self, api_name: str, func: LambdaFunction, boto3_session: boto3.session.Session ) -> "AWSResources.HTTPApi": + """Get or create HTTP API for Lambda function. + + Creates an HTTP API Gateway that routes requests to the specified Lambda function. + If the API already exists, returns the cached instance. + + Args: + api_name: Name of the HTTP API + func: Lambda function to route requests to + boto3_session: Boto3 session for AWS API calls + + Returns: + AWSResources.HTTPApi: HTTP API configuration + + Raises: + RuntimeError: If API creation fails after retries + TooManyRequestsException: If API Gateway rate limits are exceeded + """ http_api = self._http_apis.get(api_name) if not http_api: @@ -245,6 +454,18 @@ def http_api( def check_ecr_repository_exists( self, ecr_client: ECRClient, repository_name: str ) -> Optional[str]: + """Check if ECR repository exists. + + Args: + ecr_client: ECR client instance + repository_name: Name of the ECR repository + + Returns: + Optional[str]: Repository URI if exists, None otherwise + + Raises: + Exception: If ECR operation fails (other than RepositoryNotFound) + """ try: resp = ecr_client.describe_repositories(repositoryNames=[repository_name]) return resp["repositories"][0]["repositoryUri"] @@ -255,6 +476,20 @@ def check_ecr_repository_exists( raise e def get_ecr_repository(self, ecr_client: ECRClient) -> str: + """Get or create ECR repository for container deployments. + + Creates an ECR repository with a unique name based on the resource ID + if it doesn't already exist. Updates the docker_registry property. + + Args: + ecr_client: ECR client instance + + Returns: + str: ECR repository name + + Raises: + ClientError: If ECR operations fail + """ if self._container_repository is not None: return self._container_repository @@ -281,6 +516,21 @@ def get_ecr_repository(self, ecr_client: ECRClient) -> str: return self._container_repository def ecr_repository_authorization(self, ecr_client: ECRClient) -> Tuple[str, str, str]: + """Get ECR repository authorization credentials. + + Retrieves temporary authorization token from ECR and extracts + username and password for Docker registry authentication. + + Args: + ecr_client: ECR client instance + + Returns: + Tuple[str, str, str]: Username, password, and registry URL + + Raises: + AssertionError: If username or registry are None + ClientError: If ECR authorization fails + """ if self._docker_password is None: response = ecr_client.get_authorization_token() @@ -295,7 +545,16 @@ def ecr_repository_authorization(self, ecr_client: ECRClient) -> Tuple[str, str, return self._docker_username, self._docker_password, self._docker_registry @staticmethod - def initialize(res: Resources, dct: dict): + def initialize(res: Resources, dct: dict) -> None: + """Initialize AWS resources from dictionary. + + Args: + res: Base Resources instance to initialize + dct: Dictionary containing resource configuration + + Returns: + AWSResources: Initialized AWS resources instance + """ ret = cast(AWSResources, res) super(AWSResources, AWSResources).initialize(ret, dct) @@ -310,9 +569,12 @@ def initialize(res: Resources, dct: dict): for key, value in dct["http-apis"].items(): ret._http_apis[key] = AWSResources.HTTPApi.deserialize(value) - return ret - def serialize(self) -> dict: + """Serialize AWS resources to dictionary. + + Returns: + dict: Serialized resource configuration + """ out = { **super().serialize(), "lambda-role": self._lambda_role, @@ -325,7 +587,12 @@ def serialize(self) -> dict: } return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update cache with current resource configuration. + + Args: + cache: Cache instance to update + """ super().update_cache(cache) cache.update_config( val=self.docker_registry, keys=["aws", "resources", "docker", "registry"] @@ -334,7 +601,8 @@ def update_cache(self, cache: Cache): val=self.docker_username, keys=["aws", "resources", "docker", "username"] ) cache.update_config( - val=self.container_repository, keys=["aws", "resources", "container_repository"] + val=self.container_repository, + keys=["aws", "resources", "container_repository"], ) cache.update_config(val=self._lambda_role, keys=["aws", "resources", "lambda-role"]) for name, api in self._http_apis.items(): @@ -342,6 +610,16 @@ def update_cache(self, cache: Cache): @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """Deserialize AWS resources from configuration and cache. + + Args: + config: Configuration dictionary + cache: Cache instance for retrieving cached resources + handlers: Logging handlers for status messages + + Returns: + Resources: Deserialized AWSResources instance + """ ret = AWSResources() cached_config = cache.get_config("aws") @@ -365,34 +643,81 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AWSConfig(Config): - def __init__(self, credentials: AWSCredentials, resources: AWSResources): + """Main AWS configuration container. + + Combines AWS credentials and resources into a single configuration object + for use by the AWS SeBS implementation. + + Attributes: + _credentials: AWS authentication credentials + _resources: AWS resource management configuration + """ + + def __init__(self, credentials: AWSCredentials, resources: AWSResources) -> None: + """Initialize AWS configuration. + + Args: + credentials: AWS authentication credentials + resources: AWS resource management configuration + """ super().__init__(name="aws") self._credentials = credentials self._resources = resources @staticmethod def typename() -> str: + """Get the type name for this configuration. + + Returns: + str: The type name 'AWS.Config' + """ return "AWS.Config" @property def credentials(self) -> AWSCredentials: + """Get AWS credentials. + + Returns: + AWSCredentials: AWS authentication credentials + """ return self._credentials @property def resources(self) -> AWSResources: + """Get AWS resources configuration. + + Returns: + AWSResources: AWS resource management configuration + """ return self._resources - # FIXME: use future annotations (see sebs/faas/system) @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: dict) -> None: + """Initialize AWS configuration from dictionary. + + Args: + cfg: Base Config instance to initialize + dct: Dictionary containing 'region' configuration + """ config = cast(AWSConfig, cfg) config._region = dct["region"] @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + """Deserialize AWS configuration from config and cache. + + Creates an AWSConfig instance by deserializing credentials and resources, + then loading region configuration from cache or user-provided config. + + Args: + config: Configuration dictionary + cache: Cache instance for retrieving cached configuration + handlers: Logging handlers for status messages + Returns: + Config: Deserialized AWSConfig instance + """ cached_config = cache.get_config("aws") - # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) config_obj = AWSConfig(credentials, resources) @@ -408,19 +733,25 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config resources.region = config_obj.region return config_obj - """ - Update the contents of the user cache. - The changes are directly written to the file system. + def update_cache(self, cache: Cache) -> None: + """Update the contents of the user cache. - Update values: region. - """ + The changes are directly written to the file system. + Updates region, credentials, and resources in the cache. - def update_cache(self, cache: Cache): + Args: + cache: Cache instance to update + """ cache.update_config(val=self.region, keys=["aws", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) def serialize(self) -> dict: + """Serialize AWS configuration to dictionary. + + Returns: + dict: Serialized configuration including name, region, credentials, and resources + """ out = { "name": "aws", "region": self._region, diff --git a/sebs/aws/container.py b/sebs/aws/container.py index e7c2cbe6..3cfa5d5b 100644 --- a/sebs/aws/container.py +++ b/sebs/aws/container.py @@ -1,3 +1,14 @@ +"""AWS ECR container management for SeBS. + +This module provides the ECRContainer class which handles Docker container +operations for AWS Lambda deployments using Amazon Elastic Container Registry (ECR). +It extends the base DockerContainer class with AWS-specific functionality for +image registry operations. + +Key classes: + ECRContainer: AWS ECR-specific container management +""" + import docker from typing import Tuple @@ -11,12 +22,33 @@ class ECRContainer(DockerContainer): + """AWS ECR container management for SeBS. + + This class handles Docker container operations specifically for AWS Lambda + deployments using Amazon Elastic Container Registry (ECR). It provides + functionality for building, tagging, and pushing container images to ECR. + + Attributes: + ecr_client: AWS ECR client for registry operations + config: AWS-specific configuration + """ + @staticmethod - def name(): + def name() -> str: + """Get the name of this container system. + + Returns: + str: System name ('aws') + """ return "aws" @staticmethod def typename() -> str: + """Get the type name of this container system. + + Returns: + str: Type name ('AWS.ECRContainer') + """ return "AWS.ECRContainer" def __init__( @@ -25,20 +57,45 @@ def __init__( session: boto3.session.Session, config: AWSConfig, docker_client: docker.client.DockerClient, - ): - + ) -> None: + """Initialize ECR container manager. + + Args: + system_config: SeBS system configuration + session: AWS boto3 session + config: AWS-specific configuration + docker_client: Docker client for local operations + """ super().__init__(system_config, docker_client) self.ecr_client = session.client(service_name="ecr", region_name=config.region) self.config = config @property def client(self) -> ECRClient: + """Get the ECR client. + + Returns: + ECRClient: AWS ECR client for registry operations + """ return self.ecr_client def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: + """Generate ECR registry details for a benchmark image. + + Creates the registry name, repository name, image tag, and full image URI + for a specific benchmark configuration. + + Args: + benchmark: Name of the benchmark + language_name: Programming language (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (e.g., 'x64', 'arm64') + Returns: + Tuple[str, str, str, str]: Registry name, repository name, image tag, and image URI + """ account_id = self.config.credentials.account_id region = self.config.region registry_name = f"{account_id}.dkr.ecr.{region}.amazonaws.com" @@ -51,7 +108,16 @@ def registry_name( return registry_name, repository_name, image_tag, image_uri - def find_image(self, repository_name, image_tag) -> bool: + def find_image(self, repository_name: str, image_tag: str) -> bool: + """Check if an image exists in the ECR repository. + + Args: + repository_name: Name of the ECR repository + image_tag: Tag of the image to search for + + Returns: + bool: True if the image exists, False otherwise + """ try: response = self.ecr_client.describe_images( repositoryName=repository_name, imageIds=[{"imageTag": image_tag}] @@ -63,8 +129,19 @@ def find_image(self, repository_name, image_tag) -> bool: return False - def push_image(self, repository_uri, image_tag): + def push_image(self, repository_uri: str, image_tag: str) -> None: + """Push a Docker image to ECR. + + Authenticates with ECR using temporary credentials and pushes the + specified image to the repository. + + Args: + repository_uri: URI of the ECR repository + image_tag: Tag of the image to push + Raises: + RuntimeError: If the push operation fails + """ username, password, registry_url = self.config.resources.ecr_repository_authorization( self.client ) diff --git a/sebs/aws/dynamodb.py b/sebs/aws/dynamodb.py index 0f3cc878..39fd6d94 100644 --- a/sebs/aws/dynamodb.py +++ b/sebs/aws/dynamodb.py @@ -1,3 +1,13 @@ +"""AWS DynamoDB NoSQL storage implementation for SeBS. + +This module provides the DynamoDB class which implements NoSQL storage functionality +for the Serverless Benchmarking Suite using Amazon DynamoDB. It handles table +creation, data operations, and caching for benchmark data storage. + +Key classes: + DynamoDB: AWS DynamoDB NoSQL storage implementation +""" + from collections import defaultdict from typing import Dict, Optional, Tuple @@ -10,12 +20,34 @@ class DynamoDB(NoSQLStorage): + """AWS DynamoDB NoSQL storage implementation for SeBS. + + This class provides NoSQL storage functionality using Amazon DynamoDB. + It handles table creation, data operations, caching, and provides a + unified interface for benchmark data storage. + + Attributes: + client: DynamoDB client for AWS API operations + _tables: Mapping of benchmark names to table configurations + _serializer: DynamoDB type serializer for data conversion + """ + @staticmethod def typename() -> str: + """Get the type name for this storage system. + + Returns: + str: Type name ('AWS.DynamoDB') + """ return "AWS.DynamoDB" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment name for this storage system. + + Returns: + str: Deployment name ('aws') + """ return "aws" def __init__( @@ -26,7 +58,17 @@ def __init__( region: str, access_key: str, secret_key: str, - ): + ) -> None: + """Initialize DynamoDB NoSQL storage. + + Args: + session: AWS boto3 session + cache_client: Cache client for storing table configurations + resources: Cloud resource configuration + region: AWS region name + access_key: AWS access key ID + secret_key: AWS secret access key + """ super().__init__(region, cache_client, resources) self.client = session.client( "dynamodb", @@ -42,7 +84,14 @@ def __init__( self._serializer = TypeSerializer() def retrieve_cache(self, benchmark: str) -> bool: + """Retrieve table configuration from cache. + + Args: + benchmark: Name of the benchmark + Returns: + bool: True if cache was found and loaded, False otherwise + """ if benchmark in self._tables: return True @@ -53,8 +102,12 @@ def retrieve_cache(self, benchmark: str) -> bool: return False - def update_cache(self, benchmark: str): + def update_cache(self, benchmark: str) -> None: + """Update cache with current table configuration. + Args: + benchmark: Name of the benchmark to update cache for + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, @@ -64,10 +117,26 @@ def update_cache(self, benchmark: str): ) def get_tables(self, benchmark: str) -> Dict[str, str]: + """Get table mappings for a benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Dict[str, str]: Mapping of logical table names to actual DynamoDB table names + """ return self._tables[benchmark] def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """Get the actual DynamoDB table name for a logical table. + Args: + benchmark: Name of the benchmark + table: Logical table name used by the benchmark + + Returns: + Optional[str]: Actual DynamoDB table name, or None if not found + """ if benchmark not in self._tables: return None @@ -83,8 +152,19 @@ def write_to_table( data: dict, primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, - ): - + ) -> None: + """Write data to a DynamoDB table. + + Args: + benchmark: Name of the benchmark + table: Logical table name + data: Data to write to the table + primary_key: Primary key as (attribute_name, value) tuple + secondary_key: Optional secondary key as (attribute_name, value) tuple + + Raises: + AssertionError: If the table name is not found + """ table_name = self._get_table_name(benchmark, table) assert table_name is not None @@ -95,16 +175,30 @@ def write_to_table( serialized_data = {k: self._serializer.serialize(v) for k, v in data.items()} self.client.put_item(TableName=table_name, Item=serialized_data) - """ - AWS: create a DynamoDB Table - - In contrast to the hierarchy of database objects in Azure (account -> database -> container) - and GCP (database per benchmark), we need to create unique table names here. - """ - def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: + """Create a DynamoDB table for benchmark data. + + Creates a unique DynamoDB table name using resource ID, benchmark name, and provided name. + Unlike Azure (account -> database -> container) and GCP (database per benchmark), + AWS requires unique table names across the account. + + The function handles cases where the table already exists or is being created. + Uses PAY_PER_REQUEST billing mode. + + Args: + benchmark: Name of the benchmark + name: Logical table name + primary_key: Name of the primary key attribute + secondary_key: Optional name of the secondary key attribute + + Returns: + str: Name of the created table + + Raises: + RuntimeError: If table creation fails for unknown reasons + """ table_name = f"sebs-benchmarks-{self._cloud_resources.resources_id}-{benchmark}-{name}" @@ -169,7 +263,29 @@ def create_table( raise RuntimeError(f"Creating DynamoDB failed, unknown reason! Error: {e}") def clear_table(self, name: str) -> str: + """Clear all data from a table. + + Args: + name: Name of the table to clear + + Returns: + str: Result of the operation + + Raises: + NotImplementedError: This operation is not yet implemented + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """Remove a table completely. + + Args: + name: Name of the table to remove + + Returns: + str: Result of the operation + + Raises: + NotImplementedError: This operation is not yet implemented + """ raise NotImplementedError() diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 27aeb240..baa2917d 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -1,3 +1,11 @@ +""" +Module for AWS Lambda function implementation in the SeBs framework. + +This module provides the LambdaFunction class, which represents an AWS Lambda +function in the serverless benchmarking suite. It handles AWS-specific attributes +and operations such as ARN, runtime, role, and serialization. +""" + from typing import cast, Optional from sebs.aws.s3 import S3 @@ -6,6 +14,20 @@ class LambdaFunction(Function): + """ + AWS Lambda function implementation for the SeBs framework. + + This class represents an AWS Lambda function in the serverless benchmarking + suite. It extends the base Function class with AWS-specific attributes and + functionality, like resource ARN, role, and optional bucket for code deployment. + + Attributes: + arn: Amazon Resource Name of the Lambda function + role: IAM role ARN used by the function + runtime: Runtime environment for the function (e.g., 'python3.8') + bucket: S3 bucket name where the function code is stored + """ + def __init__( self, name: str, @@ -17,6 +39,19 @@ def __init__( cfg: FunctionConfig, bucket: Optional[str] = None, ): + """ + Initialize an AWS Lambda function. + + Args: + name: Name of the function + benchmark: Name of the benchmark + arn: Amazon Resource Name of the Lambda function + code_package_hash: Hash of the code package + runtime: Runtime environment for the function + role: IAM role ARN used by the function + cfg: Function configuration + bucket: S3 bucket name where the function code is stored + """ super().__init__(benchmark, name, code_package_hash, cfg) self.arn = arn self.role = role @@ -25,9 +60,21 @@ def __init__( @staticmethod def typename() -> str: + """ + Get the type name of this class. + + Returns: + str: The type name + """ return "AWS.LambdaFunction" def serialize(self) -> dict: + """ + Serialize the Lambda function to a dictionary. + + Returns: + dict: Dictionary representation of the Lambda function + """ return { **super().serialize(), "arn": self.arn, @@ -38,6 +85,18 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LambdaFunction": + """ + Create a LambdaFunction instance from a cached configuration. + + Args: + cached_config: Dictionary containing the cached function configuration + + Returns: + LambdaFunction: A new instance with the deserialized data + + Raises: + AssertionError: If an unknown trigger type is encountered + """ from sebs.faas.function import Trigger from sebs.aws.triggers import LibraryTrigger, HTTPTrigger @@ -61,6 +120,16 @@ def deserialize(cached_config: dict) -> "LambdaFunction": ret.add_trigger(trigger_type.deserialize(trigger)) return ret - def code_bucket(self, benchmark: str, storage_client: S3): + def code_bucket(self, benchmark: str, storage_client: S3) -> str: + """ + Get the S3 bucket for the function code. + + Args: + benchmark: Name of the benchmark + storage_client: S3 storage client + + Returns: + str: Name of the S3 bucket + """ self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/aws/resources.py b/sebs/aws/resources.py index 5913c392..d29d8f75 100644 --- a/sebs/aws/resources.py +++ b/sebs/aws/resources.py @@ -1,3 +1,13 @@ +"""AWS system resources management for SeBS. + +This module provides the AWSSystemResources class which manages AWS-specific +resources like S3 storage and DynamoDB NoSQL storage within the SeBS framework. +It handles initialization, caching, and provides access to AWS services. + +Key classes: + AWSSystemResources: Main resource manager for AWS services +""" + from typing import cast, Optional from sebs.aws.s3 import S3 @@ -14,12 +24,35 @@ class AWSSystemResources(SystemResources): + """AWS system resources manager for SeBS. + + This class manages AWS-specific resources including S3 storage and DynamoDB + NoSQL storage. It provides a unified interface for accessing AWS services + with proper session management and caching. + + Attributes: + _session: AWS boto3 session for API calls + _logging_handlers: Logging configuration handlers + _storage: S3 storage client instance + _nosql_storage: DynamoDB NoSQL storage client instance + """ + @staticmethod def typename() -> str: + """Get the type name for these resources. + + Returns: + str: The type name 'AWS.SystemResources' + """ return "AWS.SystemResources" @property def config(self) -> AWSConfig: + """Get the AWS configuration. + + Returns: + AWSConfig: AWS-specific configuration + """ return cast(AWSConfig, self._config) def __init__( @@ -28,7 +61,15 @@ def __init__( cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize AWS system resources. + + Args: + config: AWS-specific configuration + cache_client: Cache client for resource caching + docker_client: Docker client for container operations + logger_handlers: Logging configuration handlers + """ super().__init__(config, cache_client, docker_client) self._session: Optional[boto3.session.Session] = None @@ -36,19 +77,29 @@ def __init__( self._storage: Optional[S3] = None self._nosql_storage: Optional[DynamoDB] = None - def initialize_session(self, session: boto3.session.Session): + def initialize_session(self, session: boto3.session.Session) -> None: + """Initialize the AWS boto3 session. + + Args: + session: Boto3 session to use for AWS API calls + """ self._session = session - """ - Create a client instance for cloud storage. When benchmark and buckets - parameters are passed, then storage is initialized with required number - of buckets. Buckets may be created or retrieved from cache. + def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + """Get or create S3 storage client. - :param replace_existing: replace existing files in cached buckets? - :return: storage client - """ + Creates a client instance for S3 cloud storage. Storage is initialized + with required buckets that may be created or retrieved from cache. - def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + Args: + replace_existing: Whether to replace existing files in cached buckets + + Returns: + PersistentStorage: S3 storage client instance + + Raises: + AssertionError: If session has not been initialized + """ if not self._storage: assert self._session is not None @@ -68,6 +119,17 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStor return self._storage def get_nosql_storage(self) -> NoSQLStorage: + """Get or create DynamoDB NoSQL storage client. + + Creates a client instance for DynamoDB NoSQL storage. The client + is configured with AWS credentials and region from the system config. + + Returns: + NoSQLStorage: DynamoDB NoSQL storage client instance + + Raises: + AssertionError: If session has not been initialized + """ if not self._nosql_storage: assert self._session is not None self.logging.info("Initialize DynamoDB NoSQL instance.") diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index 79ca8905..0ba90385 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -1,3 +1,13 @@ +"""AWS S3 storage implementation for SeBS. + +This module provides the S3 class which implements persistent storage functionality +for the Serverless Benchmarking Suite using Amazon S3. It handles bucket creation, +file upload/download operations, and caching for benchmark data storage. + +Key classes: + S3: AWS S3 persistent storage implementation +""" + import os import uuid from typing import List, Optional @@ -10,20 +20,51 @@ class S3(PersistentStorage): + """AWS S3 persistent storage implementation for SeBS. + + This class provides persistent storage functionality using Amazon S3. + It handles bucket creation, file operations, and provides a unified + interface for benchmark data storage and retrieval. + + Attributes: + client: S3 client for AWS API operations + cached: Whether bucket configurations are cached + """ + @staticmethod def typename() -> str: + """Get the type name for this storage system. + + Returns: + str: Type name ('AWS.S3') + """ return "AWS.S3" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment name for this storage system. + + Returns: + str: Deployment name ('aws') + """ return "aws" @property def replace_existing(self) -> bool: + """Get whether to replace existing files. + + Returns: + bool: True if existing files should be replaced, False otherwise + """ return self._replace_existing @replace_existing.setter - def replace_existing(self, val: bool): + def replace_existing(self, val: bool) -> None: + """Set whether to replace existing files. + + Args: + val: True to replace existing files, False otherwise + """ self._replace_existing = val def __init__( @@ -35,7 +76,18 @@ def __init__( access_key: str, secret_key: str, replace_existing: bool, - ): + ) -> None: + """Initialize S3 persistent storage. + + Args: + session: AWS boto3 session + cache_client: Cache client for storing bucket configurations + resources: Cloud resource configuration + location: AWS region name + access_key: AWS access key ID + secret_key: AWS secret access key + replace_existing: Whether to replace existing files during uploads + """ super().__init__(location, cache_client, resources, replace_existing) self.client = session.client( "s3", @@ -46,11 +98,40 @@ def __init__( self.cached = False def correct_name(self, name: str) -> str: + """No correction is needed for S3 bucket name. + + Args: + name: Original bucket name + + Returns: + str: Corrected bucket name (no changes for S3) + """ return name def _create_bucket( - self, name: str, buckets: List[str] = [], randomize_name: bool = False + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False ) -> str: + """Create an S3 bucket with the specified name. + + Handles the complex S3 bucket creation logic including region-specific + requirements and conflict resolution. + + Args: + name: Desired bucket name + buckets: List of existing buckets to check against + randomize_name: Whether to append a random suffix to ensure uniqueness + + Returns: + str: Name of the created bucket + + Raises: + BucketAlreadyExists: If bucket already exists in the same region + ClientError: If bucket creation fails for other reasons + RuntimeError: If bucket already exists in us-east-1 region + """ + if buckets is None: + buckets = [] + for bucket_name in buckets: if name in bucket_name: self.logging.info( @@ -68,9 +149,11 @@ def _create_bucket( # this is incredible # https://github.com/boto/boto3/issues/125 if self.region != "us-east-1": + from typing import cast, Any + self.client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={"LocationConstraint": self.region}, + CreateBucketConfiguration={"LocationConstraint": cast(Any, self.region)}, ) else: # This is incredible x2 - boto3 will not throw exception if you recreate @@ -98,7 +181,18 @@ def _create_bucket( return bucket_name - def uploader_func(self, path_idx, key, filepath): + def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: + """Upload a file to S3 with caching and replacement logic. + + Handles the upload of benchmark files with appropriate caching behavior: + skips upload if using cached buckets and not replacing existing files, + and we know that the file is already uploaded. + + Args: + path_idx: Index of the input path configuration + key: S3 object key for the file + filepath: Local path to the file to upload + """ # Skip upload when using cached buckets and not updating storage. if self.cached and not self.replace_existing: return @@ -115,22 +209,53 @@ def uploader_func(self, path_idx, key, filepath): self.upload(bucket_name, filepath, key) - def upload(self, bucket_name: str, filepath: str, key: str): + def upload(self, bucket_name: str, filepath: str, key: str) -> None: + """Upload a file to S3. + + Args: + bucket_name: Name of the S3 bucket + filepath: Local path to the file to upload + key: S3 object key for the uploaded file + """ self.logging.info("Upload {} to {}".format(filepath, bucket_name)) self.client.upload_file(Filename=filepath, Bucket=bucket_name, Key=key) - def download(self, bucket_name: str, key: str, filepath: str): + def download(self, bucket_name: str, key: str, filepath: str) -> None: + """Download a file from S3. + + Args: + bucket_name: Name of the S3 bucket + key: S3 object key of the file to download + filepath: Local path where the file should be saved + """ self.logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) self.client.download_file(Bucket=bucket_name, Key=key, Filename=filepath) def exists_bucket(self, bucket_name: str) -> bool: + """Check if an S3 bucket exists and is accessible. + + Args: + bucket_name: Name of the bucket to check + + Returns: + bool: True if bucket exists and is accessible, False otherwise + """ try: self.client.head_bucket(Bucket=bucket_name) return True except self.client.exceptions.ClientError: return False - def list_bucket(self, bucket_name: str, prefix: str = ""): + def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """List objects in an S3 bucket with optional prefix filtering. + + Args: + bucket_name: Name of the S3 bucket + prefix: Optional prefix to filter objects + + Returns: + List[str]: List of object keys in the bucket + """ objects_list = self.client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) objects: List[str] if "Contents" in objects_list: @@ -140,17 +265,38 @@ def list_bucket(self, bucket_name: str, prefix: str = ""): return objects def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """List S3 buckets with optional name filtering. + + Args: + bucket_name: Optional bucket name pattern to filter by + + Returns: + List[str]: List of bucket names + """ s3_buckets = self.client.list_buckets()["Buckets"] if bucket_name is not None: return [bucket["Name"] for bucket in s3_buckets if bucket_name in bucket["Name"]] else: return [bucket["Name"] for bucket in s3_buckets] - def clean_bucket(self, bucket: str): + def clean_bucket(self, bucket: str) -> None: + """Remove all objects from an S3 bucket. + + Args: + bucket: Name of the bucket to clean + """ objects = self.client.list_objects_v2(Bucket=bucket) if "Contents" in objects: objects = [{"Key": obj["Key"]} for obj in objects["Contents"]] # type: ignore self.client.delete_objects(Bucket=bucket, Delete={"Objects": objects}) # type: ignore - def remove_bucket(self, bucket: str): + def remove_bucket(self, bucket: str) -> None: + """Delete an S3 bucket. + + Args: + bucket: Name of the bucket to delete + + Note: + The bucket must be empty before it can be deleted + """ self.client.delete_bucket(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f1831459..4e7e3484 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -1,3 +1,14 @@ +"""AWS trigger implementations for SeBS. + +This module provides trigger implementations for AWS Lambda functions, +including library (direct SDK) triggers and HTTP triggers via API Gateway. +Triggers handle function invocation and result processing. + +Key classes: + LibraryTrigger: Direct Lambda SDK invocation trigger + HTTPTrigger: HTTP API Gateway trigger +""" + import base64 import concurrent.futures import datetime @@ -9,30 +20,80 @@ class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + """AWS Lambda library trigger for direct SDK invocation. + + This trigger uses the AWS Lambda SDK to directly invoke Lambda functions. + It provides both synchronous and asynchronous invocation methods with + comprehensive result parsing and error handling. + + Attributes: + name: Name of the Lambda function + _deployment_client: AWS deployment client for Lambda operations + """ + + def __init__(self, fname: str, deployment_client: Optional[AWS] = None) -> None: + """Initialize the library trigger. + + Args: + fname: Name of the Lambda function + deployment_client: AWS deployment client (can be set later) + """ super().__init__() self.name = fname self._deployment_client = deployment_client @staticmethod def typename() -> str: + """Get the type name for this trigger. + + Returns: + str: Type name ('AWS.LibraryTrigger') + """ return "AWS.LibraryTrigger" @property def deployment_client(self) -> AWS: + """Get the AWS deployment client. + + Returns: + AWS: AWS deployment client + + Raises: + AssertionError: If deployment client is not set + """ assert self._deployment_client return self._deployment_client @deployment_client.setter - def deployment_client(self, deployment_client: AWS): + def deployment_client(self, deployment_client: AWS) -> None: + """Set the AWS deployment client. + + Args: + deployment_client: AWS deployment client to set + """ self._deployment_client = deployment_client @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type. + + Returns: + Trigger.TriggerType: LIBRARY trigger type + """ return Trigger.TriggerType.LIBRARY def sync_invoke(self, payload: dict) -> ExecutionResult: + """Synchronously invoke the Lambda function. + + Invokes the Lambda function with the provided payload and waits for + the result. Parses AWS-specific metrics and benchmark output. + Args: + payload: Dictionary payload to send to the function + + Returns: + ExecutionResult: Result of the function execution including metrics + """ self.logging.debug(f"Invoke function {self.name}") serialized_payload = json.dumps(payload).encode("utf-8") @@ -67,9 +128,26 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: aws_result.parse_benchmark_output(json.loads(function_output["body"])) return aws_result - def async_invoke(self, payload: dict): + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """Asynchronously invoke the Lambda function. + + Triggers the Lambda function asynchronously without waiting for + the result. Used for fire-and-forget invocations. + + Args: + payload: Dictionary payload to send to the function + + Returns: + concurrent.futures.Future: Future object representing the async invocation + + Raises: + RuntimeError: If the async invocation fails + """ # FIXME: proper return type + self.logging.warning( + "Async invoke for AWS Lambda library trigger does not wait for completion!" + ) serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() ret = client.invoke( @@ -82,44 +160,120 @@ def async_invoke(self, payload: dict): self.logging.error("Async invocation of {} failed!".format(self.name)) self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) raise RuntimeError() - return ret + + # Create a completed future with the result + future: concurrent.futures.Future = concurrent.futures.Future() + future.set_result(ret) + return future def serialize(self) -> dict: + """Serialize the trigger to a dictionary. + + Returns: + dict: Serialized trigger configuration + """ return {"type": "Library", "name": self.name} @staticmethod def deserialize(obj: dict) -> Trigger: + """Deserialize a trigger from a dictionary. + + Args: + obj: Dictionary containing trigger configuration + + Returns: + Trigger: Deserialized LibraryTrigger instance + """ return LibraryTrigger(obj["name"]) class HTTPTrigger(Trigger): - def __init__(self, url: str, api_id: str): + """AWS API Gateway HTTP trigger for Lambda functions. + + This trigger uses HTTP requests to invoke Lambda functions through + AWS API Gateway. It provides both synchronous and asynchronous + invocation methods. + + Attributes: + url: API Gateway endpoint URL + api_id: API Gateway API ID + """ + + def __init__(self, url: str, api_id: str) -> None: + """Initialize the HTTP trigger. + + Args: + url: API Gateway endpoint URL + api_id: API Gateway API ID + """ super().__init__() self.url = url self.api_id = api_id @staticmethod def typename() -> str: + """Get the type name for this trigger. + + Returns: + str: Type name ('AWS.HTTPTrigger') + """ return "AWS.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type. + + Returns: + Trigger.TriggerType: HTTP trigger type + """ return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: + """Synchronously invoke the function via HTTP. + + Sends an HTTP request to the API Gateway endpoint and waits + for the response. + Args: + payload: Dictionary payload to send to the function + + Returns: + ExecutionResult: Result of the HTTP invocation + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """Asynchronously invoke the function via HTTP. + + Submits the HTTP invocation to a thread pool for asynchronous execution. + Args: + payload: Dictionary payload to send to the function + + Returns: + concurrent.futures.Future: Future object for the async invocation + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """Serialize the trigger to a dictionary. + + Returns: + dict: Serialized trigger configuration + """ return {"type": "HTTP", "url": self.url, "api-id": self.api_id} @staticmethod def deserialize(obj: dict) -> Trigger: + """Deserialize a trigger from a dictionary. + + Args: + obj: Dictionary containing trigger configuration + + Returns: + Trigger: Deserialized HTTPTrigger instance + """ return HTTPTrigger(obj["url"], obj["api-id"]) diff --git a/sebs/azure/__init__.py b/sebs/azure/__init__.py index 499b1372..dba97945 100644 --- a/sebs/azure/__init__.py +++ b/sebs/azure/__init__.py @@ -1,3 +1,41 @@ +"""Azure integration package for SeBS benchmarking. + +This package provides comprehensive Azure integration for the Serverless +Benchmarking Suite (SeBS). It includes all necessary components for deploying, +managing, and benchmarking serverless functions on Microsoft Azure. + +Main Components: + Azure: Main system class for Azure platform integration + AzureFunction: Azure Function representation and management + AzureConfig: Configuration management for Azure credentials and resources + BlobStorage: Azure Blob Storage integration for data management + +The package handles: + - Azure Functions deployment and lifecycle management + - Azure Storage integration for benchmark data + - CosmosDB support for NoSQL benchmarks + - Resource group and subscription management + - Azure CLI integration via Docker containers + - Performance metrics collection via Application Insights + +Example: + Basic usage for Azure benchmarking: + :: + + from sebs.azure import Azure, AzureConfig + + # Load configuration + config = AzureConfig.deserialize(config_dict, cache, handlers) + + # Initialize Azure system + azure = Azure(sebs_config, config, cache, docker_client, handlers) + azure.initialize() + + # Deploy and benchmark functions + function = azure.create_function(code_package, func_name, False, "") + result = function.invoke(payload) +""" + from .azure import Azure # noqa from .function import AzureFunction # noqa from .config import AzureConfig # noqa diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index d848d724..b1b20a27 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -1,3 +1,35 @@ +"""Azure serverless platform implementation for SeBS benchmarking. + +This module provides the Azure implementation of the SeBS serverless +benchmarking system. It handles Azure Functions deployment, resource +management, code packaging, and benchmark execution on Microsoft Azure. + +Key features: + - Azure Functions deployment and management + - Azure Storage integration for code and data + - CosmosDB support for NoSQL benchmarks + - HTTP trigger configuration and invocation + - Performance metrics collection via Application Insights + - Resource lifecycle management + +The main class Azure extends the base System class to provide Azure-specific +functionality for serverless function benchmarking. + +Example: + Basic usage for Azure benchmarking:: + + from sebs.azure.azure import Azure + from sebs.azure.config import AzureConfig + + # Initialize Azure system with configuration + azure_system = Azure(sebs_config, azure_config, cache, docker_client, handlers) + azure_system.initialize() + + # Deploy and benchmark functions + function = azure_system.create_function(code_package, func_name, False, "") + result = function.invoke(payload) +""" + import datetime import json import re @@ -27,28 +59,62 @@ class Azure(System): + """Azure serverless platform implementation. + + This class implements the Azure-specific functionality for the SeBS + benchmarking suite. It handles Azure Functions deployment, resource + management, and benchmark execution on Microsoft Azure platform. + + Attributes: + logs_client: Azure logs client (currently unused) + storage: BlobStorage instance for Azure Blob Storage operations + cached: Flag indicating if resources are cached + _config: Azure configuration containing credentials and resources + AZURE_RUNTIMES: Mapping of language names to Azure runtime identifiers + """ + logs_client = None storage: BlobStorage - cached = False + cached: bool = False _config: AzureConfig # runtime mapping AZURE_RUNTIMES = {"python": "python", "nodejs": "node"} @staticmethod - def name(): + def name() -> str: + """Get the platform name. + + Returns: + Platform name 'azure'. + """ return "azure" @property def config(self) -> AzureConfig: + """Get Azure configuration. + + Returns: + Azure configuration containing credentials and resources. + """ return self._config @staticmethod def function_type() -> Type[Function]: + """Get the function type for Azure. + + Returns: + AzureFunction class type. + """ return AzureFunction @property def cli_instance(self) -> AzureCLI: + """Get Azure CLI instance. + + Returns: + Azure CLI instance for executing Azure commands. + """ return cast(AzureSystemResources, self._system_resources).cli_instance def __init__( @@ -58,7 +124,16 @@ def __init__( cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize Azure system. + + Args: + sebs_config: SeBS configuration settings + config: Azure-specific configuration + cache_client: Cache for storing function and resource data + docker_client: Docker client for container operations + logger_handlers: Logging handlers for output management + """ super().__init__( sebs_config, cache_client, @@ -68,26 +143,40 @@ def __init__( self.logging_handlers = logger_handlers self._config = config - """ - Start the Docker container running Azure CLI tools. - """ - def initialize( self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None, - ): + ) -> None: + """Initialize Azure system and start CLI container. + + Initializes Azure resources and allocates shared resources like + data storage account. Starts the Docker container with Azure CLI tools. + + Args: + config: Additional configuration parameters + resource_prefix: Optional prefix for resource naming + """ self.initialize_resources(select_prefix=resource_prefix) self.allocate_shared_resource() - def shutdown(self): + def shutdown(self) -> None: + """Shutdown Azure system and cleanup resources. + + Stops the Azure CLI container and performs cleanup of system resources. + """ cast(AzureSystemResources, self._system_resources).shutdown() super().shutdown() def find_deployments(self) -> List[str]: + """Find existing SeBS deployments by scanning resource groups. - """ - Look for duplicated resource groups. + Looks for Azure resource groups matching the SeBS naming pattern + - sebs_resource_group_(.*) - to identify existing deployments + that can be reused. + + Returns: + List of deployment identifiers found in resource groups. """ resource_groups = self.config.resources.list_resource_groups(self.cli_instance) deployments = [] @@ -99,22 +188,15 @@ def find_deployments(self) -> List[str]: return deployments - """ - Allow multiple deployment clients share the same settings. - Not an ideal situation, but makes regression testing much simpler. - """ + def allocate_shared_resource(self) -> None: + """Allocate shared data storage account. - def allocate_shared_resource(self): + Creates or retrieves the shared data storage account used for + benchmark input/output data. This allows multiple deployment + clients to share the same storage, simplifying regression testing. + """ self.config.resources.data_storage_account(self.cli_instance) - # Directory structure - # handler - # - source files - # - Azure wrappers - handler, storage - # - additional resources - # - function.json - # host.json - # requirements.txt/package.json def package_code( self, directory: str, @@ -125,6 +207,30 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """Package function code for Azure Functions deployment. + + Creates the proper directory structure and configuration files + required for Azure Functions deployment. The structure includes: + - handler/ directory with source files and Azure wrappers + - function.json with trigger and binding configuration + - host.json with runtime configuration + - requirements.txt or package.json with dependencies + + Args: + directory: Directory containing the function code + language_name: Programming language (python, nodejs) + language_version: Language runtime version + architecture: Target architecture (currently unused) + benchmark: Name of the benchmark + is_cached: Whether the package is from cache + container_deployment: Whether to use container deployment + + Returns: + Tuple of (directory_path, code_size_bytes, container_uri) + + Raises: + NotImplementedError: If container deployment is requested. + """ container_uri = "" @@ -187,6 +293,25 @@ def publish_function( container_dest: str, repeat_on_failure: bool = False, ) -> str: + """Publish function code to Azure Functions. + + Deploys the packaged function code to Azure Functions using the + Azure Functions CLI tools. Handles retries and URL extraction. + Will repeat on failure, which is useful to handle delays in + Azure cache updates - it can take between 30 and 60 seconds. + + Args: + function: Function instance to publish + code_package: Benchmark code package to deploy + container_dest: Destination path in the CLI container + repeat_on_failure: Whether to retry on failure + + Returns: + URL for invoking the published function. + + Raises: + RuntimeError: If function publication fails or URL cannot be found. + """ success = False url = "" self.logging.info("Attempting publish of function {}".format(function.name)) @@ -199,8 +324,8 @@ def publish_function( ) ) url = "" - for line in ret.split(b"\n"): - line = line.decode("utf-8") + ret_str = ret.decode("utf-8") + for line in ret_str.split("\n"): if "Invoke url:" in line: url = line.split("Invoke url:")[1].strip() break @@ -245,24 +370,29 @@ def publish_function( raise e return url - """ - Publish function code on Azure. - Boolean flag enables repeating publish operation until it succeeds. - Useful for publish immediately after function creation where it might - take from 30-60 seconds for all Azure caches to be updated. - - :param name: function name - :param repeat_on_failure: keep repeating if command fails on unknown name. - :return: URL to reach HTTP-triggered function - """ - def update_function( self, function: Function, code_package: Benchmark, container_deployment: bool, container_uri: str, - ): + ) -> None: + """Update existing Azure Function with new code. + + Updates an existing Azure Function with new code package, + including environment variables and function configuration. + It also ensures an HTTP trigger is correctly associated with + the function's URL. + + Args: + function: Function instance to update + code_package: New benchmark code package + container_deployment: Whether using container deployment + container_uri: Container URI (unused for Azure) + + Raises: + NotImplementedError: If container deployment is requested. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Azure") @@ -293,7 +423,23 @@ def update_function( trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) - def update_envs(self, function: Function, code_package: Benchmark, env_variables: dict = {}): + def update_envs( + self, function: Function, code_package: Benchmark, env_variables: dict = {} + ) -> None: + """Update environment variables for Azure Function. + + Sets up environment variables required for benchmark execution, + including storage connection strings and NoSQL database credentials. + Preserves existing environment variables while adding new ones. + + Args: + function: Function instance to update + code_package: Benchmark code package with requirements + env_variables: Additional environment variables to set + + Raises: + RuntimeError: If environment variable operations fail. + """ envs = {} if code_package.uses_nosql: @@ -377,13 +523,33 @@ def update_envs(self, function: Function, code_package: Benchmark, env_variables self.logging.error(e) raise e - def update_function_configuration(self, function: Function, code_package: Benchmark): + def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: + """Update Azure Function configuration. + + Currently not implemented for Azure Functions as memory and timeout + configuration is handled at the consumption plan level. + + Args: + function: Function instance to configure + code_package: Benchmark code package with requirements + """ # FIXME: this does nothing currently - we don't specify timeout self.logging.warning( "Updating function's memory and timeout configuration is not supported." ) def _mount_function_code(self, code_package: Benchmark) -> str: + """Mount function code package in Azure CLI container. + + Uploads the function code package to a temporary location in the + Azure CLI container for deployment operations. + + Args: + code_package: Benchmark code package to mount + + Returns: + Path to mounted code in the CLI container. + """ dest = os.path.join("/mnt", "function", uuid.uuid4().hex) self.cli_instance.upload_package(code_package.code_location, dest) return dest @@ -391,8 +557,18 @@ def _mount_function_code(self, code_package: Benchmark) -> str: def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: - """ - Functionapp names must be globally unique in Azure. + """Generate default function name for Azure. + + Creates a globally unique function name based on resource ID, + benchmark name, language, and version. Function app names must + be globally unique across all of Azure. + + Args: + code_package: Benchmark code package + resources: Optional resources (unused) + + Returns: + Globally unique function name for Azure. """ func_name = ( "sebs-{}-{}-{}-{}".format( @@ -413,6 +589,25 @@ def create_function( container_deployment: bool, container_uri: str, ) -> AzureFunction: + """Create new Azure Function. + + Creates a new Azure Function App and deploys the provided code package. + Handles function app creation, storage account allocation, and initial + deployment with proper configuration. + + Args: + code_package: Benchmark code package to deploy + func_name: Name for the Azure Function App + container_deployment: Whether to use container deployment + container_uri: Container URI (unused for Azure) + + Returns: + AzureFunction instance representing the created function. + + Raises: + NotImplementedError: If container deployment is requested. + RuntimeError: If function creation fails. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Azure") @@ -496,8 +691,15 @@ def create_function( ) return function - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """Initialize cached function with current configuration. + + Sets up a cached function with current data storage account + and logging handlers for all triggers. + Args: + function: Function instance loaded from cache + """ data_storage_account = self.config.resources.data_storage_account(self.cli_instance) for trigger in function.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) @@ -511,13 +713,26 @@ def download_metrics( end_time: int, requests: Dict[str, ExecutionResult], metrics: Dict[str, dict], - ): + ) -> None: + """Download execution metrics from Azure Application Insights. + + Retrieves performance metrics for function executions from Azure + Application Insights and updates the execution results with + provider-specific timing information. + + Args: + function_name: Name of the Azure Function + start_time: Start timestamp for metrics collection + end_time: End timestamp for metrics collection + requests: Dictionary of execution results to update + metrics: Additional metrics dictionary (unused) + """ self.cli_instance.install_insights() resource_group = self.config.resources.resource_group(self.cli_instance) # Avoid warnings in the next step - ret = self.cli_instance.execute( + self.cli_instance.execute( "az feature register --name AIWorkspacePreview " "--namespace microsoft.insights" ) app_id_query = self.cli_instance.execute( @@ -549,7 +764,7 @@ def download_metrics( invocations_to_process = set(requests.keys()) # while len(invocations_processed) < len(requests.keys()): self.logging.info("Azure: Running App Insights query.") - ret = self.cli_instance.execute( + ret_bytes = self.cli_instance.execute( ( 'az monitor app-insights query --app {} --analytics-query "{}" ' "--start-time {} {} --end-time {} {}" @@ -561,11 +776,12 @@ def download_metrics( end_time_str, timezone_str, ) - ).decode("utf-8") - ret = json.loads(ret) - ret = ret["tables"][0] + ) + ret_str = ret_bytes.decode("utf-8") + json_data = json.loads(ret_str) + table_data = json_data["tables"][0] # time is last, invocation is second to last - for request in ret["rows"]: + for request in table_data["rows"]: invocation_id = request[-2] # might happen that we get invocation from another experiment if invocation_id not in requests: @@ -584,14 +800,31 @@ def download_metrics( # TODO: query performance counters for mem - def _enforce_cold_start(self, function: Function, code_package: Benchmark): + def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> None: + """Enforce cold start for a single function. + + Updates environment variable to force cold start behavior. + Args: + function: Function instance to update + code_package: Benchmark code package + """ self.update_envs(function, code_package, {"ForceColdStart": str(self.cold_start_counter)}) # FIXME: is this sufficient to enforce cold starts? # self.update_function(function, code_package, False, "") - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """Enforce cold start for multiple functions. + + Forces cold start behavior for all provided functions by updating + environment variables and waiting for changes to propagate: + sleep is added to allow changes to propagate. + + Args: + functions: List of functions to enforce cold start for + code_package: Benchmark code package + """ self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func, code_package) @@ -599,72 +832,17 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) time.sleep(20) - """ - The only implemented trigger at the moment is HTTPTrigger. - It is automatically created for each function. - """ - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - raise NotImplementedError() + """Create trigger for Azure Function. + + Currently not implemented as HTTP triggers are automatically + created for each function during deployment. + Args: + function: Function to create trigger for + trigger_type: Type of trigger to create -# -# def create_azure_function(self, fname, config): -# -# # create function name -# region = self.config["config"]["region"] -# # only hyphens are allowed -# # and name needs to be globally unique -# func_name = fname.replace(".", "-").replace("_", "-") -# -# # create function app -# self.cli_instance.execute( -# ( -# "az functionapp create --resource-group {} " -# "--os-type Linux --consumption-plan-location {} " -# "--runtime {} --runtime-version {} --name {} " -# "--storage-account {}" -# ).format( -# self.resource_group_name, -# region, -# self.AZURE_RUNTIMES[self.language], -# self.config["config"]["runtime"][self.language], -# func_name, -# self.storage_account_name, -# ) -# ) -# logging.info("Created function app {}".format(func_name)) -# return func_name -# -# init = False -# -# def create_function_copies( -# self, -# function_names: List[str], -# code_package: Benchmark, -# experiment_config: dict, -# ): -# -# if not self.init: -# code_location = code_package.code_location -# # package = self.package_code(code_location, code_package.benchmark) -# # code_size = code_package.code_size -# # Restart Docker instance to make sure code package is mounted -# self.start(code_location, restart=True) -# self.storage_account() -# self.resource_group() -# self.init = True -# -# # names = [] -# # for fname in function_names: -# # names.append(self.create_azure_function(fname, experiment_config)) -# names = function_names -# -# # time.sleep(30) -# urls = [] -# for fname in function_names: -# url = self.publish_function(fname, repeat_on_failure=True) -# urls.append(url) -# logging.info("Published function app {} with URL {}".format(fname, url)) -# -# return names, urls + Raises: + NotImplementedError: Trigger creation is not supported. + """ + raise NotImplementedError() diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index 079e72d3..b682fa46 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -1,3 +1,29 @@ +"""Azure Blob Storage implementation for SeBS benchmarking. + +This module provides Azure Blob Storage integration for the SeBS benchmarking +suite. It handles container management, file uploads/downloads, and storage +operations required for serverless function benchmarking. + +The BlobStorage class implements the PersistentStorage interface to provide +Azure-specific storage operations including container creation, file management, +and cleanup operations. + +Example: + Basic usage for Azure Blob Storage: + + :: + + from sebs.azure.blob_storage import BlobStorage + + # Initialize with connection string + storage = BlobStorage(region, cache, resources, connection_string, False) + + # Upload benchmark data + storage.upload(container_name, filepath, key) + # Download results + storage.download(container_name, key, local_filepath) +""" + import os import uuid from typing import List, Optional @@ -10,12 +36,33 @@ class BlobStorage(PersistentStorage): + """Azure Blob Storage implementation for benchmark data management. + + This class provides Azure Blob Storage operations for storing and retrieving + benchmark input data, function outputs, and temporary files. It manages + containers (equivalent to S3 buckets) and handles file operations with + proper error handling and logging. + + Attributes: + client: Azure Blob Service client for storage operations + """ + @staticmethod def typename() -> str: + """Get the storage type name. + + Returns: + Storage type identifier for Azure Blob Storage. + """ return "Azure.BlobStorage" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment platform name. + + Returns: + Platform name 'azure'. + """ return "azure" def __init__( @@ -25,17 +72,39 @@ def __init__( resources: Resources, conn_string: str, replace_existing: bool, - ): + ) -> None: + """Initialize Azure Blob Storage. + + Args: + region: Azure region for storage operations + cache_client: Cache for storing storage configuration + resources: Resources configuration + conn_string: Azure Storage connection string + replace_existing: Whether to replace existing files + """ super().__init__(region, cache_client, resources, replace_existing) self.client: BlobServiceClient = BlobServiceClient.from_connection_string(conn_string) - """ - Internal implementation of creating a new container. - """ - def _create_bucket( - self, name: str, containers: List[str] = [], randomize_name: bool = False + self, name: str, containers: Optional[List[str]] = None, randomize_name: bool = False ) -> str: + """Create new Azure Blob Storage container. + + Internal implementation for creating containers with optional + name randomization and existence checking. + + Args: + name: Base name for the container + containers: List of existing containers to check + randomize_name: Whether to append random suffix to name + + Returns: + Name of the created or existing container. + """ + + if containers is None: + containers = [] + for c in containers: if name in c: self.logging.info("Container {} for {} already exists, skipping.".format(c, name)) @@ -47,14 +116,31 @@ def _create_bucket( self.logging.info("Created container {}".format(name)) return name - """ - Azure does not allow dots in container names. - """ - def correct_name(self, name: str) -> str: + """Correct container name for Azure requirements. + + Azure Blob Storage does not allow dots in container names, + so they are replaced with hyphens. + + Args: + name: Original container name + + Returns: + Corrected container name with dots replaced by hyphens. + """ return name.replace(".", "-") def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """List Azure Blob Storage containers. + + Lists all containers or those matching a prefix. + + Args: + bucket_name: Optional prefix to filter container names + + Returns: + List of container names. + """ if bucket_name is not None: return [ container["name"] @@ -63,7 +149,17 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: else: return [container["name"] for container in self.client.list_containers()] - def uploader_func(self, container_idx, file, filepath): + def uploader_func(self, container_idx: int, file: str, filepath: str) -> None: + """Upload file to Azure Blob Storage container. + + Uploads a file to the specified container with proper path handling + and duplicate checking. + + Args: + container_idx: Index of the container for file organization + file: Name of the file being uploaded + filepath: Local path to the file to upload + """ # Skip upload when using cached containers if self.cached and not self.replace_existing: return @@ -82,37 +178,60 @@ def uploader_func(self, container_idx, file, filepath): client.upload_blob(data=file_data, overwrite=True) self.logging.info("Upload {} to {}".format(filepath, container_name)) - """ - Download file from bucket. + def download(self, container_name: str, key: str, filepath: str) -> None: + """Download file from Azure Blob Storage. - :param container_name: - :param file: - :param filepath: - """ + Downloads a blob from the specified container to a local file. - def download(self, container_name: str, key: str, filepath: str): + Args: + container_name: Name of the Azure Blob Storage container + key: Blob key/name in the container + filepath: Local file path to save the downloaded content + """ self.logging.info("Download {}:{} to {}".format(container_name, key, filepath)) client = self.client.get_blob_client(container_name, key) with open(filepath, "wb") as download_file: download_file.write(client.download_blob().readall()) - def upload(self, container_name: str, filepath: str, key: str): + def upload(self, container_name: str, filepath: str, key: str) -> None: + """Upload file to Azure Blob Storage. + + Uploads a local file to the specified container with the given key. + + Args: + container_name: Name of the Azure Blob Storage container + filepath: Local file path to upload + key: Blob key/name in the container + """ self.logging.info("Upload {} to {}".format(filepath, container_name)) client = self.client.get_blob_client(container_name, key) with open(filepath, "rb") as upload_file: client.upload_blob(upload_file) # type: ignore def exists_bucket(self, container: str) -> bool: + """Check if Azure Blob Storage container exists. + + Args: + container: Name of the container to check + + Returns: + True if container exists, False otherwise. + """ return self.client.get_container_client(container).exists() - """ - Return list of files in a container. + def list_bucket(self, container: str, prefix: str = "") -> List[str]: + """List files in Azure Blob Storage container. - :param container: - :return: list of file names. empty if container empty - """ + Returns list of blob names in the specified container, + optionally filtered by prefix. - def list_bucket(self, container: str, prefix: str = ""): + Args: + container: Name of the container to list + prefix: Optional prefix to filter blob names + + Returns: + List of blob names. Empty list if container is empty. + """ objects = list( map( lambda x: x["name"], @@ -121,12 +240,27 @@ def list_bucket(self, container: str, prefix: str = ""): ) return [x for x in objects if prefix in x] - def clean_bucket(self, bucket: str): + def clean_bucket(self, bucket: str) -> None: + """Clean all blobs from Azure Blob Storage container. + + Removes all blobs from the specified container but keeps + the container itself. + + Args: + bucket: Name of the container to clean + """ self.logging.info("Clean output container {}".format(bucket)) container_client = self.client.get_container_client(bucket) blobs = list(map(lambda x: x["name"], container_client.list_blobs())) if len(blobs) > 0: container_client.delete_blobs(*blobs) - def remove_bucket(self, bucket: str): + def remove_bucket(self, bucket: str) -> None: + """Remove Azure Blob Storage container. + + Deletes the entire container and all its contents. + + Args: + bucket: Name of the container to remove + """ self.client.get_container_client(bucket).delete_container() diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index b875ee02..b7648202 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -1,3 +1,32 @@ +"""Azure CLI Docker container management for SeBS benchmarking. + +This module provides a wrapper around the Azure CLI running in a Docker container. +It handles container lifecycle, command execution, file uploads, and Azure-specific +operations required for serverless function deployment and management. + +The AzureCLI class manages a Docker container with Azure CLI tools and provides +methods for executing Azure commands, uploading function packages, and handling +authentication. + +Example: + Basic usage for Azure CLI operations: + + :: + + from sebs.azure.cli import AzureCLI + + # Initialize CLI container + cli = AzureCLI(system_config, docker_client) + + # Login to Azure + cli.login(app_id, tenant, password) + # Execute Azure CLI commands + result = cli.execute("az group list") + + # Upload function package + cli.upload_package(local_dir, container_dest) +""" + import io import logging import os @@ -10,8 +39,30 @@ class AzureCLI(LoggingBase): - def __init__(self, system_config: SeBSConfig, docker_client: docker.client): + """Azure CLI Docker container wrapper. + + This class manages a Docker container running Azure CLI tools and provides + methods for executing Azure commands, handling authentication, and managing + file transfers for serverless function deployment. + Attributes: + docker_instance: Docker container running Azure CLI + _insights_installed: Flag indicating if Application Insights extension is installed + """ + + def __init__(self, system_config: SeBSConfig, docker_client: docker.client) -> None: + """Initialize Azure CLI container. + + Creates and starts a Docker container with Azure CLI tools installed. + Handles image pulling if not available locally. + + Args: + system_config: SeBS system configuration + docker_client: Docker client for container operations + + Raises: + RuntimeError: If Docker image pull fails. + """ super().__init__() repo_name = system_config.docker_repository() @@ -40,7 +91,7 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): detach=True, tty=True, ) - self._insights_installed = False + self._insights_installed: bool = False self.logging.info(f"Started Azure CLI container: {self.docker_instance.id}.") while True: try: @@ -52,14 +103,28 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): @staticmethod def typename() -> str: + """Get the CLI type name. + + Returns: + Type identifier for Azure CLI. + """ return "Azure.CLI" - """ - Execute the given command in Azure CLI. - Throws an exception on failure (commands are expected to execute succesfully). - """ + def execute(self, cmd: str) -> bytes: + """Execute Azure CLI command in Docker container. + + Executes the given command in the Azure CLI container and returns + the output. Raises an exception if the command fails. + + Args: + cmd: Azure CLI command to execute - def execute(self, cmd: str): + Returns: + Command output as bytes. + + Raises: + RuntimeError: If command execution fails. + """ exit_code, out = self.docker_instance.exec_run(cmd, user="docker_user") if exit_code != 0: raise RuntimeError( @@ -69,11 +134,20 @@ def execute(self, cmd: str): ) return out - """ - Run azure login command on Docker instance. - """ - def login(self, appId: str, tenant: str, password: str) -> bytes: + """Login to Azure using service principal credentials. + + Authenticates with Azure using service principal credentials + within the Docker container. + + Args: + appId: Azure application (client) ID + tenant: Azure tenant (directory) ID + password: Azure client secret + + Returns: + Login command output as bytes. + """ result = self.execute( "az login -u {0} --service-principal --tenant {1} -p {2}".format( appId, @@ -84,38 +158,50 @@ def login(self, appId: str, tenant: str, password: str) -> bytes: self.logging.info("Azure login succesful") return result - def upload_package(self, directory: str, dest: str): + def upload_package(self, directory: str, dest: str) -> None: + """Upload function package to Docker container. - """ - This is not an efficient and memory-intensive implementation. - So far, we didn't have very large functions that require many gigabytes. + Creates a compressed archive of the function package and uploads + it to the specified destination in the Docker container. - Since docker-py does not support a straightforward copy, and we can't - put_archive in chunks. + Note: + This implementation loads the entire archive into memory, + This is an inefficient and memory-intensive implementation. + So far, we didn't have very large functions that require many gigabytes. + docker-py does not support a straightforward copy and we can't + call put_archive with chunks. - If we end up having problems because of the archive size, there are two - potential solutions: - (1) manually call docker cp and decompress - (2) commit the docker container and restart with a new mount volume. + For large packages, there are two potential solutions: + (1) manually call docker cp and decompress + (2) commit the docker container and restart with a new mounted volume. + + Args: + directory: Local directory containing function package + dest: Destination path in the Docker container """ handle = io.BytesIO() with tarfile.open(fileobj=handle, mode="w:gz") as tar: for f in os.listdir(directory): tar.add(os.path.join(directory, f), arcname=f) - # shutil.make_archive(, 'zip', directory) # move to the beginning of memory before writing handle.seek(0) self.execute("mkdir -p {}".format(dest)) self.docker_instance.put_archive(path=dest, data=handle.read()) - def install_insights(self): + def install_insights(self) -> None: + """Install Azure Application Insights CLI extension. + + Installs the Application Insights extension for Azure CLI + if not already installed. Required for metrics collection. + """ if not self._insights_installed: self.execute("az extension add --name application-insights") + self._insights_installed = True - """ - Shutdowns Docker instance. - """ + def shutdown(self) -> None: + """Shutdown Azure CLI Docker container. - def shutdown(self): + Stops and removes the Docker container running Azure CLI tools. + """ self.logging.info("Stopping Azure manage Docker instance") self.docker_instance.stop() diff --git a/sebs/azure/cloud_resources.py b/sebs/azure/cloud_resources.py index e0d2a1dd..5476c552 100644 --- a/sebs/azure/cloud_resources.py +++ b/sebs/azure/cloud_resources.py @@ -1,29 +1,64 @@ +"""Azure cloud resource management for SeBS. + +This module manages deployed special resources in Azure cloud, particularly +CosmosDB accounts that require special handling for authentication and +configuration management. +""" + import json -from typing import Optional +from typing import Dict, Optional from sebs.azure.cli import AzureCLI -""" - Keep a list of deployed special resources in Azure cloud. - Currently, we have here CosmosDB accounts that require special handling. -""" +class CosmosDBAccount: + """Azure CosmosDB account configuration and management. + Manages CosmosDB account information including account name, endpoint URL, + and authentication credentials. Provides methods for querying account + details from Azure and serialization for caching. + + Attributes: + _account_name (str): Name of the CosmosDB account + _url (str): Document endpoint URL for the account + _credential (str): Primary master key for authentication + """ -class CosmosDBAccount: @property def account_name(self) -> str: + """Get the CosmosDB account name. + + Returns: + str: The name of the CosmosDB account. + """ return self._account_name @property def url(self) -> str: + """Get the CosmosDB document endpoint URL. + + Returns: + str: The document endpoint URL for the CosmosDB account. + """ return self._url @property def credential(self) -> str: + """Get the CosmosDB authentication credential. + + Returns: + str: The primary master key for CosmosDB authentication. + """ return self._credential - def __init__(self, account_name: str, url: str, credential: str): + def __init__(self, account_name: str, url: str, credential: str) -> None: + """Initialize CosmosDB account configuration. + + Args: + account_name (str): Name of the CosmosDB account + url (str): Document endpoint URL for the account + credential (str): Primary master key for authentication + """ super().__init__() self._account_name = account_name self._url = url @@ -31,13 +66,36 @@ def __init__(self, account_name: str, url: str, credential: str): @staticmethod def from_cache(account_name: str, url: str, credential: str) -> "CosmosDBAccount": + """Create CosmosDB account instance from cached data. + + Args: + account_name (str): Name of the CosmosDB account + url (str): Document endpoint URL for the account + credential (str): Primary master key for authentication + + Returns: + CosmosDBAccount: New instance with provided configuration. + """ return CosmosDBAccount(account_name, url, credential) @staticmethod def from_allocation( - account_name: str, resource_group: str, cli_instance: AzureCLI, url: Optional[str] + account_name: str, resource_group: str, cli_instance: AzureCLI, url: Optional[str] = None ) -> "CosmosDBAccount": + """Create CosmosDB account instance by querying Azure. + + Queries Azure CLI to retrieve account configuration including + endpoint URL and authentication credentials. + Args: + account_name (str): Name of the CosmosDB account + resource_group (str): Azure resource group containing the account + cli_instance (AzureCLI): Azure CLI instance for executing commands + url (Optional[str]): Pre-known URL, if None will query from Azure + + Returns: + CosmosDBAccount: New instance with queried configuration. + """ if url is None: url = CosmosDBAccount.query_url( account_name, @@ -55,27 +113,67 @@ def from_allocation( @staticmethod def query_url(account_name: str, resource_group: str, cli_instance: AzureCLI) -> str: + """Query CosmosDB account endpoint URL from Azure. + + Uses Azure CLI to retrieve the document endpoint URL for the + specified CosmosDB account. + Args: + account_name (str): Name of the CosmosDB account + resource_group (str): Azure resource group containing the account + cli_instance (AzureCLI): Azure CLI instance for executing commands + + Returns: + str: The document endpoint URL for the CosmosDB account. + + Raises: + RuntimeError: If Azure CLI command fails. + KeyError: If the expected response structure is not found. + """ # Find the endpoint URL ret = cli_instance.execute( f" az cosmosdb show --name {account_name} " f" --resource-group {resource_group} " ) - ret = json.loads(ret.decode("utf-8")) - return ret["documentEndpoint"] + ret_dct = json.loads(ret.decode("utf-8")) + return ret_dct["documentEndpoint"] @staticmethod def query_credentials(account_name: str, resource_group: str, cli_instance: AzureCLI) -> str: + """Query CosmosDB account authentication credentials from Azure. + + Uses Azure CLI to retrieve the primary master key for the + specified CosmosDB account. + Args: + account_name (str): Name of the CosmosDB account + resource_group (str): Azure resource group containing the account + cli_instance (AzureCLI): Azure CLI instance for executing commands + + Returns: + str: The primary master key for CosmosDB authentication. + + Raises: + RuntimeError: If Azure CLI command fails. + KeyError: If the expected response structure is not found. + """ # Read the master key to access CosmosDB account ret = cli_instance.execute( f" az cosmosdb keys list --name {account_name} " f" --resource-group {resource_group} " ) - ret = json.loads(ret.decode("utf-8")) - credential = ret["primaryMasterKey"] + ret_dct = json.loads(ret.decode("utf-8")) + credential = ret_dct["primaryMasterKey"] return credential - def serialize(self) -> dict: + def serialize(self) -> Dict[str, str]: + """Serialize CosmosDB account configuration to dictionary. + + Returns: + Dict[str, str]: Dictionary containing account configuration with keys: + - account_name: The CosmosDB account name + - url: The document endpoint URL + - credential: The primary master key + """ return { "account_name": self._account_name, "url": self._url, @@ -83,5 +181,17 @@ def serialize(self) -> dict: } @staticmethod - def deserialize(obj: dict) -> "CosmosDBAccount": + def deserialize(obj: Dict[str, str]) -> "CosmosDBAccount": + """Deserialize CosmosDB account configuration from dictionary. + + Args: + obj (Dict[str, str]): Dictionary containing account configuration + with required keys: account_name, url, credential + + Returns: + CosmosDBAccount: New instance with deserialized configuration. + + Raises: + KeyError: If required keys are missing from the dictionary. + """ return CosmosDBAccount.from_cache(obj["account_name"], obj["url"], obj["credential"]) diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 9aef0d8c..5e20ea75 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -1,3 +1,16 @@ +"""Configuration management for Azure serverless benchmarking. + +This module provides configuration classes for Azure resources, credentials, +and deployment settings. It handles Azure-specific configuration including +service principal authentication, resource group management, storage accounts, +and CosmosDB setup. + +Key classes: + AzureCredentials: Manages Azure service principal authentication + AzureResources: Manages Azure resource allocation and lifecycle + AzureConfig: Combines credentials and resources for Azure deployment +""" + import json import logging import os @@ -5,7 +18,6 @@ import uuid from typing import cast, Dict, List, Optional - from sebs.azure.cli import AzureCLI from sebs.azure.cloud_resources import CosmosDBAccount from sebs.cache import Cache @@ -14,14 +26,35 @@ class AzureCredentials(Credentials): + """Azure service principal credentials for authentication. + + This class manages Azure service principal credentials required for + authenticating with Azure services. It handles app ID, tenant ID, + password, and subscription ID validation and caching. + + Attributes: + _appId: Azure application (client) ID + _tenant: Azure tenant (directory) ID + _password: Azure client secret + _subscription_id: Azure subscription ID (optional) + """ _appId: str _tenant: str _password: str + _subscription_id: Optional[str] def __init__( self, appId: str, tenant: str, password: str, subscription_id: Optional[str] = None - ): + ) -> None: + """Initialize Azure credentials. + + Args: + appId: Azure application (client) ID + tenant: Azure tenant (directory) ID + password: Azure client secret + subscription_id: Azure subscription ID (optional) + """ super().__init__() self._appId = appId self._tenant = tenant @@ -30,24 +63,54 @@ def __init__( @property def appId(self) -> str: + """Get the Azure application (client) ID. + + Returns: + Azure application ID string. + """ return self._appId @property def tenant(self) -> str: + """Get the Azure tenant (directory) ID. + + Returns: + Azure tenant ID string. + """ return self._tenant @property def password(self) -> str: + """Get the Azure client secret. + + Returns: + Azure client secret string. + """ return self._password @property def subscription_id(self) -> str: + """Get the Azure subscription ID. + + Returns: + Azure subscription ID string. + + Raises: + AssertionError: If subscription ID is not set. + """ assert self._subscription_id is not None return self._subscription_id @subscription_id.setter - def subscription_id(self, subscription_id: str): + def subscription_id(self, subscription_id: str) -> None: + """Set the Azure subscription ID with validation. + + Args: + subscription_id: Azure subscription ID to set + Raises: + RuntimeError: If provided subscription ID conflicts with cached value. + """ if self._subscription_id is not None and subscription_id != self._subscription_id: self.logging.error( f"The subscription id {subscription_id} from provided " @@ -64,15 +127,44 @@ def subscription_id(self, subscription_id: str): @property def has_subscription_id(self) -> bool: + """Check if subscription ID is set. + + Returns: + True if subscription ID is set, False otherwise. + """ return self._subscription_id is not None @staticmethod def initialize(dct: dict, subscription_id: Optional[str]) -> "AzureCredentials": + """Initialize credentials from dictionary. + + Args: + dct: Dictionary containing credential information + subscription_id: Optional subscription ID to set + + Returns: + New AzureCredentials instance. + """ return AzureCredentials(dct["appId"], dct["tenant"], dct["password"], subscription_id) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """Deserialize credentials from config and cache. + + Loads Azure credentials from either the configuration dictionary + or environment variables, with subscription ID retrieved from cache. + + Args: + config: Configuration dictionary + cache: Cache instance for storing/retrieving cached values + handlers: Logging handlers for error reporting + + Returns: + AzureCredentials instance with loaded configuration. + Raises: + RuntimeError: If no valid credentials are found in config or environment. + """ cached_config = cache.get_config("azure") ret: AzureCredentials old_subscription_id: Optional[str] = None @@ -101,83 +193,182 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret def serialize(self) -> dict: + """Serialize credentials to dictionary. + + We store only subscription ID to avoid unsecure storage of sensitive data. + + Returns: + Dictionary containing serialized credential data. + """ out = {"subscription_id": self.subscription_id} return out - def update_cache(self, cache_client: Cache): + def update_cache(self, cache_client: Cache) -> None: + """Update credentials in cache. + + Args: + cache_client: Cache instance to update + """ cache_client.update_config(val=self.serialize(), keys=["azure", "credentials"]) class AzureResources(Resources): + """Azure resource management for SeBS benchmarking. + + This class manages Azure cloud resources including storage accounts, + resource groups, and CosmosDB accounts. + + Attributes: + _resource_group: Name of the Azure resource group + _storage_accounts: List of storage accounts for function code + _data_storage_account: Storage account for benchmark data + _cosmosdb_account: CosmosDB account for NoSQL storage + """ + class Storage: - def __init__(self, account_name: str, connection_string: str): + """Azure Storage Account wrapper. + + Represents an Azure Storage Account with connection details + for use in serverless function deployment and data storage. + + Attributes: + account_name: Name of the Azure storage account + connection_string: Connection string for accessing the storage account + """ + + def __init__(self, account_name: str, connection_string: str) -> None: + """Initialize Azure Storage account. + + Args: + account_name: Name of the Azure storage account + connection_string: Connection string for storage access + """ super().__init__() self.account_name = account_name self.connection_string = connection_string - # FIXME: 3.7+ migration with future annotations @staticmethod def from_cache(account_name: str, connection_string: str) -> "AzureResources.Storage": + """Create Storage instance from cached data. + + Args: + account_name: Name of the storage account + connection_string: Connection string for the account + + Returns: + New Storage instance with the provided details. + + Raises: + AssertionError: If connection string is empty. + """ assert connection_string, "Empty connection string for account {}".format(account_name) return AzureResources.Storage(account_name, connection_string) @staticmethod def from_allocation(account_name: str, cli_instance: AzureCLI) -> "AzureResources.Storage": + """Create Storage instance from newly allocated account. + + Args: + account_name: Name of the storage account + cli_instance: Azure CLI instance for querying connection string + + Returns: + New Storage instance with queried connection string. + """ connection_string = AzureResources.Storage.query_connection_string( account_name, cli_instance ) ret = AzureResources.Storage(account_name, connection_string) return ret - """ - Query the storage string in Azure using selected storage account. - """ - @staticmethod def query_connection_string(account_name: str, cli_instance: AzureCLI) -> str: + """Query connection string for storage account from Azure. + + Args: + account_name: Name of the storage account + cli_instance: Azure CLI instance for executing queries + + Returns: + Connection string for the storage account. + """ ret = cli_instance.execute( "az storage account show-connection-string --name {}".format(account_name) ) - ret = json.loads(ret.decode("utf-8")) - connection_string = ret["connectionString"] + ret_dct = json.loads(ret.decode("utf-8")) + connection_string = ret_dct["connectionString"] return connection_string def serialize(self) -> dict: + """Serialize storage account to dictionary. + + Returns: + Dictionary containing storage account information. + """ return vars(self) @staticmethod def deserialize(obj: dict) -> "AzureResources.Storage": + """Deserialize storage account from dictionary. + + Args: + obj: Dictionary containing storage account data + + Returns: + New Storage instance from dictionary data. + """ return AzureResources.Storage.from_cache(obj["account_name"], obj["connection_string"]) - # FIXME: 3.7 Python, future annotations def __init__( self, resource_group: Optional[str] = None, - storage_accounts: List["AzureResources.Storage"] = [], + storage_accounts: Optional[List["AzureResources.Storage"]] = None, data_storage_account: Optional["AzureResources.Storage"] = None, cosmosdb_account: Optional[CosmosDBAccount] = None, - ): + ) -> None: + """Initialize Azure resources. + + Args: + resource_group: Name of Azure resource group + storage_accounts: List of storage accounts for function code + data_storage_account: Storage account for benchmark data + cosmosdb_account: CosmosDB account for NoSQL operations + """ super().__init__(name="azure") self._resource_group = resource_group - self._storage_accounts = storage_accounts + self._storage_accounts = storage_accounts or [] self._data_storage_account = data_storage_account self._cosmosdb_account = cosmosdb_account - def set_region(self, region: str): + def set_region(self, region: str) -> None: + """Set the Azure region for resource allocation. + + Args: + region: Azure region name (e.g., 'westus2') + """ self._region = region @property def storage_accounts(self) -> List["AzureResources.Storage"]: + """Get list of storage accounts for function code. + + Returns: + List of Storage instances for function deployment. + """ return self._storage_accounts - """ - Locate resource group name in config. - If not found, then create a new resource group with uuid-based name. + def resource_group(self, cli_instance: AzureCLI) -> str: + """Get or create Azure resource group. - Requires Azure CLI instance in Docker. - """ + Locates existing resource group or creates a new one with UUID-based name. + The resource group is used to contain all SeBS-related Azure resources. - def resource_group(self, cli_instance: AzureCLI) -> str: + Args: + cli_instance: Azure CLI instance for resource operations + + Returns: + Name of the resource group. + """ # Create resource group if not known if not self._resource_group: # Only underscore and alphanumeric characters are allowed @@ -199,7 +390,19 @@ def resource_group(self, cli_instance: AzureCLI) -> str: return self._resource_group def list_resource_groups(self, cli_instance: AzureCLI) -> List[str]: + """List SeBS resource groups in the current region. + Queries Azure for existing resource groups that match the SeBS naming pattern. + + Args: + cli_instance: Azure CLI instance for executing queries + + Returns: + List of resource group names matching SeBS pattern. + + Raises: + RuntimeError: If Azure CLI response cannot be parsed. + """ ret = cli_instance.execute( "az group list --query " "\"[?starts_with(name,'sebs_resource_group_') && location=='{0}']\"".format( @@ -214,8 +417,19 @@ def list_resource_groups(self, cli_instance: AzureCLI) -> List[str]: self.logging.error(ret.decode()) raise RuntimeError("Failed to parse response from Azure CLI!") - def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = True): + def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = True) -> None: + """Delete Azure resource group. + Removes the specified resource group and all contained resources. + + Args: + cli_instance: Azure CLI instance for executing deletion + name: Name of resource group to delete + wait: Whether to wait for deletion to complete + + Raises: + RuntimeError: If resource group deletion fails. + """ cmd = "az group delete -y --name {0}".format(name) if not wait: cmd += " --no-wait" @@ -225,15 +439,21 @@ def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = self.logging.error(ret.decode()) raise RuntimeError("Failed to delete the resource group!") - """ - Find or create a serverless CosmosDB account. - If not found, then create a new one based on the current resource ID. - Restriction: account names must be globally unique. + def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: + """Get or create CosmosDB account for NoSQL storage. - Requires Azure CLI instance in Docker. - """ + Finds existing CosmosDB account or creates a new serverless one. + Account names must be globally unique across Azure. - def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: + Args: + cli_instance: Azure CLI instance for CosmosDB operations + + Returns: + CosmosDBAccount instance for NoSQL operations. + + Raises: + RuntimeError: If CosmosDB account creation or parsing fails. + """ # Create resource group if not known if not self._cosmosdb_account: @@ -274,7 +494,19 @@ def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: return self._cosmosdb_account def list_cosmosdb_accounts(self, cli_instance: AzureCLI) -> Dict[str, str]: + """List existing CosmosDB accounts in resource group. + + Queries for CosmosDB accounts matching the SeBS naming pattern. + + Args: + cli_instance: Azure CLI instance for executing queries + Returns: + Dictionary mapping account names to document endpoints. + + Raises: + RuntimeError: If Azure CLI response cannot be parsed. + """ ret = cli_instance.execute( f" az cosmosdb list --resource-group {self._resource_group} " " --query \"[?starts_with(name,'sebs-cosmosdb-account')]\" " @@ -287,13 +519,18 @@ def list_cosmosdb_accounts(self, cli_instance: AzureCLI) -> Dict[str, str]: self.logging.error(ret.decode()) raise RuntimeError("Failed to parse response from Azure CLI!") - """ - Retrieve or create storage account associated with benchmark data. - Last argument allows to override the resource - useful when handling - a single instance through multiple threads using different clients sharing the same cache. - """ - def data_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": + """Get or create storage account for benchmark data. + + Retrieves existing or creates new storage account dedicated to storing + benchmark input/output data. This is separate from function code storage. + + Args: + cli_instance: Azure CLI instance for storage operations + + Returns: + Storage instance for benchmark data operations. + """ if not self._data_storage_account: # remove non-numerical and non-alphabetic characters @@ -304,7 +541,19 @@ def data_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storag return self._data_storage_account def list_storage_accounts(self, cli_instance: AzureCLI) -> List[str]: + """List storage accounts in the resource group. + + Queries for all storage accounts within the managed resource group. + + Args: + cli_instance: Azure CLI instance for executing queries + Returns: + List of storage account names. + + Raises: + RuntimeError: If Azure CLI response cannot be parsed. + """ ret = cli_instance.execute( ("az storage account list --resource-group {0}").format( self.resource_group(cli_instance) @@ -318,11 +567,18 @@ def list_storage_accounts(self, cli_instance: AzureCLI) -> List[str]: self.logging.error(ret.decode()) raise RuntimeError("Failed to parse response from Azure CLI!") - """ - Create a new function storage account and add to the list. - """ - def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": + """Create new storage account for function code. + + Creates a new storage account with a UUID-based name for storing + function code packages and adds it to the managed accounts list. + + Args: + cli_instance: Azure CLI instance for storage operations + + Returns: + New Storage instance for function code storage. + """ # Create account. Only alphanumeric characters are allowed # This one is used to store functions code - hence the name. @@ -333,15 +589,22 @@ def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage self._storage_accounts.append(account) return account - """ - Internal implementation of creating a new storage account. - The method does NOT update cache and - does NOT add the account to any resource collection. - """ - def _create_storage_account( self, cli_instance: AzureCLI, account_name: str ) -> "AzureResources.Storage": + """Internal method to create storage account. + + Creates a new Azure storage account with the specified name. + This one can be usedboth for data storage and function storage. + This method does NOT update cache or add to resource collections. + + Args: + cli_instance: Azure CLI instance for storage operations + account_name: Name for the new storage account + + Returns: + New Storage instance for the created account. + """ sku = "Standard_LRS" self.logging.info("Starting allocation of storage account {}.".format(account_name)) cli_instance.execute( @@ -358,20 +621,28 @@ def _create_storage_account( self.logging.info("Storage account {} created.".format(account_name)) return AzureResources.Storage.from_allocation(account_name, cli_instance) - """ - Update the contents of the user cache. - The changes are directly written to the file system. + def update_cache(self, cache_client: Cache) -> None: + """Update resource configuration in cache. - Update values: storage accounts, data storage accounts, resource groups. - """ + Persists current resource state including storage accounts, + data storage accounts, and resource groups to filesystem cache. - def update_cache(self, cache_client: Cache): + Args: + cache_client: Cache instance for storing configuration + """ super().update_cache(cache_client) cache_client.update_config(val=self.serialize(), keys=["azure", "resources"]) @staticmethod - def initialize(res: Resources, dct: dict): + def initialize(res: Resources, dct: dict) -> None: + """Initialize resources from dictionary data. + Populates resource instance with data from configuration dictionary. + + Args: + res: Resources instance to initialize + dct: Dictionary containing resource configuration + """ ret = cast(AzureResources, res) super(AzureResources, AzureResources).initialize(ret, dct) @@ -392,6 +663,11 @@ def initialize(res: Resources, dct: dict): ret._cosmosdb_account = CosmosDBAccount.deserialize(dct["cosmosdb_account"]) def serialize(self) -> dict: + """Serialize resources to dictionary. + + Returns: + Dictionary containing all resource configuration data. + """ out = super().serialize() if len(self._storage_accounts) > 0: out["storage_accounts"] = [x.serialize() for x in self._storage_accounts] @@ -405,7 +681,19 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """Deserialize resources from config and cache. + + Loads Azure resources from cache if available, otherwise from configuration. + If no data is present, then we initialize an empty resources object. + + Args: + config: Configuration dictionary + cache: Cache instance for retrieving cached values + handlers: Logging handlers for error reporting + Returns: + AzureResources instance with loaded configuration. + """ cached_config = cache.get_config("azure") ret = AzureResources() # Load cached values @@ -426,30 +714,72 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AzureConfig(Config): - def __init__(self, credentials: AzureCredentials, resources: AzureResources): + """Complete Azure configuration for SeBS benchmarking. + + Combines Azure credentials and resources into a single configuration + object for managing Azure serverless function deployments. + + Attributes: + _credentials: Azure service principal credentials + _resources: Azure resource management instance + """ + + def __init__(self, credentials: AzureCredentials, resources: AzureResources) -> None: + """Initialize Azure configuration. + + Args: + credentials: Azure service principal credentials + resources: Azure resource management instance + """ super().__init__(name="azure") self._credentials = credentials self._resources = resources @property def credentials(self) -> AzureCredentials: + """Get Azure credentials. + + Returns: + AzureCredentials instance for authentication. + """ return self._credentials @property def resources(self) -> AzureResources: + """Get Azure resources manager. + + Returns: + AzureResources instance for resource management. + """ return self._resources - # FIXME: use future annotations (see sebs/faas/system) @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: dict) -> None: + """Initialize configuration from dictionary data. + + Args: + cfg: Config instance to initialize + dct: Dictionary containing configuration data + """ config = cast(AzureConfig, cfg) config._region = dct["region"] @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + """Deserialize complete Azure configuration. + Creates AzureConfig instance from configuration dictionary and cache, + combining credentials and resources with region information. + + Args: + config: Configuration dictionary + cache: Cache instance for storing/retrieving cached values + handlers: Logging handlers for error reporting + + Returns: + AzureConfig instance with complete Azure configuration. + """ cached_config = cache.get_config("azure") - # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AzureCredentials, AzureCredentials.deserialize(config, cache, handlers)) resources = cast(AzureResources, AzureResources.deserialize(config, cache, handlers)) config_obj = AzureConfig(credentials, resources) @@ -465,19 +795,24 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config resources.set_region(config_obj.region) return config_obj - """ - Update the contents of the user cache. - The changes are directly written to the file system. + def update_cache(self, cache: Cache) -> None: + """Update complete configuration in cache. - Update values: region. - """ + Persists region, credentials, and resources to filesystem cache. - def update_cache(self, cache: Cache): + Args: + cache: Cache instance for storing configuration + """ cache.update_config(val=self.region, keys=["azure", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) def serialize(self) -> dict: + """Serialize complete configuration to dictionary. + + Returns: + Dictionary containing all Azure configuration data. + """ out = { "name": "azure", "region": self._region, diff --git a/sebs/azure/cosmosdb.py b/sebs/azure/cosmosdb.py index 52f8086b..088ea2c3 100644 --- a/sebs/azure/cosmosdb.py +++ b/sebs/azure/cosmosdb.py @@ -1,3 +1,31 @@ +"""Azure CosmosDB integration for SeBS NoSQL benchmarking. + +This module provides Azure CosmosDB integration for NoSQL benchmarks in the +SeBS benchmarking suite. It handles database and container management, +data operations, and resource lifecycle for NoSQL-based benchmarks. + +The module includes: + - BenchmarkResources: Dataclass for managing benchmark-specific resources + - CosmosDB: Main class for CosmosDB operations and management + +Example: + Basic usage for CosmosDB operations: + + :: + + from sebs.azure.cosmosdb import CosmosDB + + # Initialize CosmosDB with account + cosmosdb = CosmosDB(cache, resources, cosmosdb_account) + + # Set up benchmark database and containers + db_name = cosmosdb.benchmark_database("my-benchmark") + tables = cosmosdb.get_tables("my-benchmark") + + # Perform operations + credentials = cosmosdb.credentials() +""" + from dataclasses import dataclass from typing import cast, Dict, List, Optional, Tuple @@ -13,6 +41,16 @@ @dataclass class BenchmarkResources: + """Resource container for benchmark-specific CosmosDB resources. + + This dataclass holds the database and container names allocated + for a specific benchmark, along with the database client proxy. + + Attributes: + database: Name of the CosmosDB database + containers: List of container names for the benchmark + database_client: CosmosDB database proxy (allocated dynamically) + """ database: str containers: List[str] @@ -20,23 +58,77 @@ class BenchmarkResources: database_client: Optional[DatabaseProxy] = None def serialize(self) -> dict: + """Serialize benchmark resources to dictionary. + + Returns: + Dictionary containing database and container names. + """ return {"database": self.database, "containers": self.containers} @staticmethod def deserialize(config: dict) -> "BenchmarkResources": + """Deserialize benchmark resources from dictionary. + + Args: + config: Dictionary containing resource configuration + + Returns: + BenchmarkResources instance with restored configuration. + """ return BenchmarkResources(database=config["database"], containers=config["containers"]) class CosmosDB(NoSQLStorage): + """Azure CosmosDB implementation for NoSQL storage in SeBS benchmarking. + + This class provides Azure CosmosDB integration for NoSQL benchmarks, + handling database and container management, data operations, and + resource lifecycle. It supports benchmark-specific database allocation + and container creation with proper caching and error handling. + + Azure CosmosDB has the following model: + - Each benchmark gets its own database + - Container names match benchmark table names directly + - No table mappings are required + - Partition keys are configured per container + + Attributes: + _cli_instance: Azure CLI instance for CosmosDB operations + _resource_group: Name of Azure resource group containing CosmosDB + _benchmark_resources: Dict mapping benchmark names to their resources + _cosmos_client: CosmosDB client for database operations + _cosmosdb_account: CosmosDB account configuration and credentials + """ + @staticmethod def typename() -> str: + """Get the storage type name. + + Returns: + String identifier for Azure CosmosDB storage type. + """ return "Azure.CosmosDB" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment platform name. + + Returns: + String identifier for Azure deployment. + """ return "azure" - def __init__(self, cli: AzureCLI, cache_client: Cache, resources: AzureResources, region: str): + def __init__( + self, cli: AzureCLI, cache_client: Cache, resources: AzureResources, region: str + ) -> None: + """Initialize CosmosDB storage handler. + + Args: + cli: Azure CLI instance for executing CosmosDB operations + cache_client: Cache instance for storing/retrieving configurations + resources: Azure resources manager for resource allocation + region: Azure region for resource placement + """ super().__init__(region, cache_client, resources) self._cli_instance = cli self._resource_group = resources.resource_group(self._cli_instance) @@ -45,15 +137,33 @@ def __init__(self, cli: AzureCLI, cache_client: Cache, resources: AzureResources self._cosmos_client: Optional[CosmosClient] = None self._cosmosdb_account: Optional[CosmosDBAccount] = None - """ - Azure requires no table mappings: the name of container is the same as benchmark name. - """ - def get_tables(self, benchmark: str) -> Dict[str, str]: + """Get table mappings for benchmark. + + Azure requires no table mappings since container names match + benchmark table names directly. + + Args: + benchmark: Name of the benchmark + + Returns: + Empty dictionary as no mappings are needed for Azure CosmosDB. + """ return {} def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """Get the actual table name for a benchmark table. + + Validates that the table exists in the benchmark's containers + and returns the table name if found. + Args: + benchmark: Name of the benchmark + table: Logical table name to resolve + + Returns: + Actual table name if found, None if benchmark or table doesn't exist. + """ if benchmark not in self._benchmark_resources: return None @@ -63,7 +173,17 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: return table def retrieve_cache(self, benchmark: str) -> bool: + """Retrieve benchmark resources from cache. + Attempts to load cached benchmark resources including database + and container information from the filesystem cache. + + Args: + benchmark: Name of the benchmark to retrieve from cache + + Returns: + True if cache was found and loaded, False otherwise. + """ if benchmark in self._benchmark_resources: return True @@ -74,14 +194,28 @@ def retrieve_cache(self, benchmark: str) -> bool: return False - def update_cache(self, benchmark: str): + def update_cache(self, benchmark: str) -> None: + """Update benchmark resources in cache. + Persists current benchmark resources including database and + container information to the filesystem cache. + + Args: + benchmark: Name of the benchmark to cache + """ self.cache_client.update_nosql( self.deployment_name(), benchmark, self._benchmark_resources[benchmark].serialize() ) def cosmos_client(self) -> CosmosClient: + """Get or create CosmosDB client. + Initializes the CosmosDB client using the account credentials. + The client is cached after first initialization. + + Returns: + CosmosClient instance for database operations. + """ if self._cosmos_client is None: self._cosmosdb_account = cast(AzureResources, self._cloud_resources).cosmosdb_account( @@ -95,13 +229,39 @@ def cosmos_client(self) -> CosmosClient: return self._cosmos_client def has_tables(self, benchmark: str) -> bool: + """Check if benchmark has allocated tables. + + Args: + benchmark: Name of the benchmark to check + + Returns: + True if benchmark has allocated resources, False otherwise. + """ return benchmark in self._benchmark_resources def benchmark_database(self, benchmark: str) -> str: + """Get database name for benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Name of the CosmosDB database for the benchmark. + + Raises: + KeyError: If benchmark resources are not allocated. + """ return self._benchmark_resources[benchmark].database def credentials(self) -> Tuple[str, str, str]: + """Get CosmosDB account credentials. + Retrieves the account name, URL, and credential for CosmosDB access. + Initializes the CosmosDB account if not already done. + + Returns: + Tuple containing (account_name, url, credential) for CosmosDB access. + """ # An update of function that uses fully cached data will have # to initialize it separately # There were no prior actions that initialized this variable @@ -123,7 +283,22 @@ def write_to_table( data: dict, primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, - ): + ) -> None: + """Write data to CosmosDB container. + + Inserts data into the specified container with required key fields. + CosmosDB requires both a partition key and an 'id' field for documents. + + Args: + benchmark: Name of the benchmark + table: Name of the container/table + data: Dictionary data to insert + primary_key: Tuple of (key_name, key_value) for partition key + secondary_key: Tuple of (key_name, key_value) for document id + + Raises: + AssertionError: If table name cannot be resolved or secondary_key is None. + """ res = self._benchmark_resources[benchmark] table_name = self._get_table_name(benchmark, table) assert table_name is not None @@ -143,20 +318,34 @@ def write_to_table( def create_table( self, benchmark: str, name: str, primary_key: str, _: Optional[str] = None ) -> str: + """Create CosmosDB container for benchmark table. + + Creates a new CosmosDB database and container for the benchmark if they + don't exist. Each benchmark gets its own database, and containers are + created within that database for each table. + + Args: + benchmark: Name of the benchmark + name: Name of the container/table to create + primary_key: Partition key field name for the container + _: Unused parameter for compatibility with base class + + Returns: + Name of the created container. + Raises: + CosmosResourceNotFoundError: If database or container operations fail. + """ benchmark_resources = self._benchmark_resources.get(benchmark, None) if benchmark_resources is not None and name in benchmark_resources.containers: self.logging.info(f"Using cached CosmosDB container {name}") - """ - For some reason, creating the client is enough to verify existence of db/container. - We need to force the client to make some actions; that's why we call read. - """ - # Each benchmark receives its own CosmosDB database if benchmark_resources is None: - # Get or allocate database + # For some reason, creating the client is not enough to verify existence + # of db/container. + # We need to force the client to make some actions; that's why we call read. try: db_client = self.cosmos_client().get_database_client(benchmark) db_client.read() @@ -177,7 +366,6 @@ def create_table( ) try: - # verify it exists benchmark_resources.database_client.get_container_client(name).read() self.logging.info(f"Using existing CosmosDB container {name}") @@ -194,7 +382,29 @@ def create_table( return name def clear_table(self, name: str) -> str: + """Clear all data from a table. + + Args: + name: Name of the table to clear + + Returns: + Name of the cleared table. + + Raises: + NotImplementedError: This operation is not yet implemented. + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """Remove a table completely. + + Args: + name: Name of the table to remove + + Returns: + Name of the removed table. + + Raises: + NotImplementedError: This operation is not yet implemented. + """ raise NotImplementedError() diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 61ef4c57..a95aff04 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,8 +1,20 @@ +"""Azure Function implementation for SeBS benchmarking. + +The AzureFunction class extends the base Function class and adds +one Azure-specific property: storage account associated with this function. +""" + from sebs.azure.config import AzureResources from sebs.faas.function import Function, FunctionConfig class AzureFunction(Function): + """Azure Function implementation. + + Attributes: + function_storage: Azure Storage account used for function code storage + """ + def __init__( self, name: str, @@ -10,11 +22,25 @@ def __init__( code_hash: str, function_storage: AzureResources.Storage, cfg: FunctionConfig, - ): + ) -> None: + """Initialize Azure Function. + + Args: + name: Name of the Azure Function + benchmark: Name of the benchmark this function implements + code_hash: Hash of the function code for caching + function_storage: Azure Storage account for function code + cfg: Function configuration with memory, timeout, etc. + """ super().__init__(benchmark, name, code_hash, cfg) self.function_storage = function_storage def serialize(self) -> dict: + """Serialize function to dictionary. + + Returns: + Dictionary containing function data including Azure-specific storage. + """ return { **super().serialize(), "function_storage": self.function_storage.serialize(), @@ -22,6 +48,20 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: + """Deserialize function from cached configuration. + + Recreates an AzureFunction instance from cached data including + function configuration, storage account, and triggers. + + Args: + cached_config: Dictionary containing cached function data + + Returns: + AzureFunction instance with restored configuration. + + Raises: + AssertionError: If unknown trigger type is encountered. + """ cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( cached_config["name"], diff --git a/sebs/azure/system_resources.py b/sebs/azure/system_resources.py index 0e3494d1..a009074c 100644 --- a/sebs/azure/system_resources.py +++ b/sebs/azure/system_resources.py @@ -1,25 +1,57 @@ +"""Azure system resources management for SeBS. + +This module provides Azure-specific system resource management including +storage accounts, CosmosDB instances, and Azure CLI management for +serverless benchmark execution. +""" + import json -from typing import cast, Optional +from typing import Optional, cast + +import docker -from sebs.config import SeBSConfig -from sebs.azure.config import AzureConfig from sebs.azure.blob_storage import BlobStorage -from sebs.azure.cosmosdb import CosmosDB from sebs.azure.cli import AzureCLI +from sebs.azure.config import AzureConfig +from sebs.azure.cosmosdb import CosmosDB from sebs.cache import Cache +from sebs.config import SeBSConfig from sebs.faas.resources import SystemResources from sebs.utils import LoggingHandlers -import docker - class AzureSystemResources(SystemResources): + """Azure system resources manager for SeBS benchmarking. + + Manages Azure-specific system resources including Blob Storage, + CosmosDB for NoSQL operations, and Azure CLI for resource management. + Handles authentication, resource initialization, and lifecycle management. + + Attributes: + _logging_handlers (LoggingHandlers): Logging configuration handlers + _storage (Optional[BlobStorage]): Azure Blob Storage instance + _nosql_storage (Optional[CosmosDB]): Azure CosmosDB instance + _cli_instance (Optional[AzureCLI]): Azure CLI Docker container instance + _system_config (SeBSConfig): SeBS system configuration + _cli_instance_stop (bool): Flag to control CLI instance lifecycle + """ + @staticmethod def typename() -> str: + """Get the system resources type name. + + Returns: + str: Type identifier for Azure system resources. + """ return "Azure.SystemResources" @property def config(self) -> AzureConfig: + """Get the Azure configuration. + + Returns: + AzureConfig: Azure-specific configuration instance. + """ return cast(AzureConfig, self._config) def __init__( @@ -27,9 +59,18 @@ def __init__( system_config: SeBSConfig, config: AzureConfig, cache_client: Cache, - docker_client: docker.client, + docker_client: docker.client.DockerClient, logger_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize Azure system resources. + + Args: + system_config (SeBSConfig): SeBS system configuration + config (AzureConfig): Azure-specific configuration + cache_client (Cache): Cache for storing resource information + docker_client (docker.client.DockerClient): Docker client for container management + logger_handlers (LoggingHandlers): Logging configuration handlers + """ super().__init__(config, cache_client, docker_client) self._logging_handlers = logger_handlers @@ -37,19 +78,20 @@ def __init__( self._nosql_storage: Optional[CosmosDB] = None self._cli_instance: Optional[AzureCLI] = None self._system_config = system_config + self._cli_instance_stop: bool = True - """ - Create wrapper object for Azure blob storage. - First ensure that storage account is created and connection string - is known. Then, create wrapper and create request number of buckets. + def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: + """Get or create Azure Blob Storage instance. Requires Azure CLI instance in Docker to obtain storage account details. - :param replace_existing: when true, replace existing files in input buckets - :return: Azure storage instance - """ + Args: + replace_existing (Optional[bool]): When True, replace existing files in input buckets. + If None, defaults to False. - def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: + Returns: + BlobStorage: Azure Blob Storage instance for benchmark data management. + """ if self._storage is None: self._storage = BlobStorage( self.config.region, @@ -64,14 +106,32 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: return self._storage def get_nosql_storage(self) -> CosmosDB: + """Get or create Azure CosmosDB instance. + + Creates and configures CosmosDB instance for NoSQL benchmark operations. + Handles authentication and database/container creation as needed. + + Requires Azure CLI instance in Docker. + + Returns: + CosmosDB: Azure CosmosDB instance for NoSQL operations. + """ if self._nosql_storage is None: self._nosql_storage = CosmosDB( self.cli_instance, self._cache_client, self.config.resources, self.config.region ) return self._nosql_storage - def _login_cli(self): + def _login_cli(self) -> None: + """Login to Azure CLI using service principal credentials. + Authenticates with Azure using the configured service principal + credentials and validates subscription access. + + Raises: + RuntimeError: If no valid subscription is found or multiple subscriptions exist. + AssertionError: If CLI instance is not initialized. + """ assert self._cli_instance is not None output = self._cli_instance.login( @@ -90,7 +150,14 @@ def _login_cli(self): @property def cli_instance(self) -> AzureCLI: + """Get or create Azure CLI instance. + + Creates and initializes Azure CLI Docker container if not already created. + Handles authentication automatically on first access. + Returns: + AzureCLI: Azure CLI instance for executing Azure commands. + """ if self._cli_instance is None: self._cli_instance = AzureCLI(self._system_config, self._docker_client) self._cli_instance_stop = True @@ -99,7 +166,17 @@ def cli_instance(self) -> AzureCLI: return self._cli_instance - def initialize_cli(self, cli: AzureCLI, login: bool = False): + def initialize_cli(self, cli: AzureCLI, login: bool = False) -> None: + """Initialize with existing Azure CLI instance. + + Allows using an external Azure CLI instance instead of creating a new one. + Useful for sharing CLI instances across multiple resource managers. + + Args: + cli (AzureCLI): External Azure CLI instance to use + login (bool): Whether to perform login with this CLI instance. + Defaults to False. + """ self._cli_instance = cli self._cli_instance_stop = False @@ -107,5 +184,11 @@ def initialize_cli(self, cli: AzureCLI, login: bool = False): self._login_cli() def shutdown(self) -> None: + """Shutdown Azure system resources. + + Cleans up Azure CLI Docker container and other resources. + Only shuts down CLI if it was created by this instance. + Does not terminate CLI instance attached to the class. + """ if self._cli_instance and self._cli_instance_stop: self._cli_instance.shutdown() diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 4296a588..3e3aa586 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -1,3 +1,26 @@ +"""Azure Function triggers for SeBS benchmarking. + +This module provides Azure-specific trigger implementations for invoking +serverless functions. + +Example: + Basic usage for HTTP trigger: + + :: + + from sebs.azure.triggers import HTTPTrigger + + # Create HTTP trigger with function URL + trigger = HTTPTrigger(function_url, data_storage_account) + + # Synchronous invocation + result = trigger.sync_invoke(payload) + + # Asynchronous invocation + future = trigger.async_invoke(payload) + result = future.result() +""" + import concurrent.futures from typing import Any, Dict, Optional # noqa @@ -6,41 +29,125 @@ class AzureTrigger(Trigger): - def __init__(self, data_storage_account: Optional[AzureResources.Storage] = None): + """Base class for Azure Function triggers. + + This abstract base class provides common functionality for Azure Function + triggers, including data storage account management for benchmark data + handling. + + FIXME: do we still need to know the data storage account? + + Attributes: + _data_storage_account: Azure storage account for benchmark data + """ + + def __init__(self, data_storage_account: Optional[AzureResources.Storage] = None) -> None: + """Initialize Azure trigger. + + Args: + data_storage_account: Optional Azure storage account for data operations + """ super().__init__() self._data_storage_account = data_storage_account @property def data_storage_account(self) -> AzureResources.Storage: + """Get the data storage account. + + Returns: + Azure storage account for benchmark data. + + Raises: + AssertionError: If data storage account is not set. + """ assert self._data_storage_account return self._data_storage_account @data_storage_account.setter - def data_storage_account(self, data_storage_account: AzureResources.Storage): + def data_storage_account(self, data_storage_account: AzureResources.Storage) -> None: + """Set the data storage account. + + Args: + data_storage_account: Azure storage account to set + """ self._data_storage_account = data_storage_account class HTTPTrigger(AzureTrigger): - def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None): + """HTTP trigger for Azure Functions. + + This class implements HTTP-based invocation of Azure Functions, supporting + both synchronous and asynchronous execution patterns for benchmarking. + + Attributes: + url: HTTP endpoint URL for the Azure Function + """ + + def __init__( + self, url: str, data_storage_account: Optional[AzureResources.Storage] = None + ) -> None: + """Initialize HTTP trigger. + + Args: + url: HTTP endpoint URL for the Azure Function + data_storage_account: Optional Azure storage account for data operations + """ super().__init__(data_storage_account) self.url = url @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type. + + Returns: + HTTP trigger type identifier. + """ return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: + """Synchronously invoke Azure Function via HTTP. + + Sends HTTP request to the function endpoint and waits for response. + Args: + payload: Dictionary payload to send to the function + + Returns: + ExecutionResult containing response data and timing information. + """ return self._http_invoke(payload, self.url) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """Asynchronously invoke Azure Function via HTTP. + + Submits function invocation to a thread pool for parallel execution. + + Args: + payload: Dictionary payload to send to the function + + Returns: + Future object that can be used to retrieve the result. + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """Serialize trigger to dictionary. + + Returns: + Dictionary containing trigger type and URL. + """ return {"type": "HTTP", "url": self.url} @staticmethod def deserialize(obj: dict) -> Trigger: + """Deserialize trigger from dictionary. + + Args: + obj: Dictionary containing trigger data + + Returns: + HTTPTrigger instance with restored configuration. + """ return HTTPTrigger(obj["url"]) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index f159e820..0e295fee 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -1,3 +1,11 @@ +""" +Module for handling benchmarks in the Serverless Benchmarking Suite (SeBS). + +This module provides classes for benchmark configuration, code packaging, and execution. +It handles the preparation of code packages with dependencies for deployment to +various serverless platforms, including caching mechanisms to avoid redundant builds. +""" + import glob import hashlib import json @@ -23,9 +31,33 @@ class BenchmarkConfig: + """ + Configuration for a benchmark in the Serverless Benchmarking Suite. + + This class stores the configuration parameters for a benchmark, including + timeout, memory allocation, supported languages, and included modules. + + Attributes: + + timeout: Maximum execution time in seconds + memory: Memory allocation in MB + languages: List of supported programming languages + modules: List of benchmark modules/features required + + """ + def __init__( self, timeout: int, memory: int, languages: List["Language"], modules: List[BenchmarkModule] ): + """ + Initialize a benchmark configuration. + + Args: + timeout: Maximum execution time in seconds + memory: Memory allocation in MB + languages: List of supported programming languages + modules: List of benchmark modules/features required + """ self._timeout = timeout self._memory = memory self._languages = languages @@ -33,31 +65,75 @@ def __init__( @property def timeout(self) -> int: + """ + Get the maximum execution time in seconds. + + Returns: + int: The timeout value + """ return self._timeout @timeout.setter def timeout(self, val: int): + """ + Set the maximum execution time in seconds. + + Args: + val: The new timeout value + """ self._timeout = val @property def memory(self) -> int: + """ + Get the memory allocation in MB. + + Returns: + int: The memory allocation + """ return self._memory @memory.setter def memory(self, val: int): + """ + Set the memory allocation in MB. + + Args: + val: The new memory allocation value + """ self._memory = val @property def languages(self) -> List["Language"]: + """ + Get the list of supported programming languages. + + Returns: + List[Language]: Supported programming languages + """ return self._languages @property def modules(self) -> List[BenchmarkModule]: + """ + Get the list of benchmark modules/features required. + + Returns: + List[BenchmarkModule]: Required benchmark modules + """ return self._modules - # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(json_object: dict) -> "BenchmarkConfig": + """ + Create a BenchmarkConfig instance from a JSON object. + + Args: + json_object: Dictionary containing benchmark configuration + + Returns: + BenchmarkConfig: A new instance with the deserialized data + """ from sebs.faas.function import Language return BenchmarkConfig( @@ -68,110 +144,288 @@ def deserialize(json_object: dict) -> "BenchmarkConfig": ) -""" - Creates code package representing a benchmark with all code and assets - prepared and dependency install performed within Docker image corresponding - to the cloud deployment. - - The behavior of the class depends on cache state: - 1) First, if there's no cache entry, a code package is built. - 2) Otherwise, the hash of the entire benchmark is computed and compared - with the cached value. If changed, then rebuilt then benchmark. - 3) Otherwise, just return the path to cache code. -""" +class Benchmark(LoggingBase): + """ + Creates code package representing a benchmark with all code and assets. + + This class handles building, packaging, and deploying benchmark code for + serverless platforms. + This includes copying source files, adding deployment-specific wrappers, + adding deployment-specific dependencies, and installing application dependencies + within Docker images corresponding to the target cloud deployment. + Code packages are cached. + + The behavior of this class, particularly the `build` method, depends on the + state of the SeBS cache: + + 1. If no cache entry exists for the benchmark (for the current language, deployment, etc.), + a new code package is built. + 2. If a cache entry exists, the hash of the benchmark's source directory is computed + and compared with the hash of cached package. If they differ, or if an update is forced, + the package is rebuilt. + 3. Otherwise (cache entry exists and hash matches), the cached code package is used. + + Attributes: + benchmark: Name of the benchmark + benchmark_path: Path to the benchmark directory + benchmark_config: Configuration for the benchmark + code_package: Dictionary with code package information + functions: Dictionary of functions for this benchmark + code_location: Location of the code package + is_cached: Whether the benchmark is cached + is_cached_valid: Whether the cached benchmark is valid + code_size: Size of the code package in bytes + container_uri: URI of the container for container deployments + language: Programming language for the benchmark + language_name: Name of the programming language + language_version: Version of the programming language + has_input_processed: Whether input processing has been performed + uses_storage: Whether the benchmark uses cloud storage + uses_nosql: Whether the benchmark uses NoSQL databases + architecture: CPU architecture of the deployment target + container_deployment: Whether using container deployment + """ + + _hash_value: Optional[str] -class Benchmark(LoggingBase): @staticmethod def typename() -> str: + """ + Get the type name of this class. + + Returns: + str: The type name + """ return "Benchmark" @property - def benchmark(self): + def benchmark(self) -> str: + """ + Get the benchmark name. + + Returns: + str: Name of the benchmark + """ return self._benchmark @property - def benchmark_path(self): + def benchmark_path(self) -> str: + """ + Get the path to the benchmark directory. + + Returns: + str: Path to the benchmark directory + """ + assert self._benchmark_path is not None return self._benchmark_path @property def benchmark_config(self) -> BenchmarkConfig: + """ + Get the benchmark configuration. + + Returns: + BenchmarkConfig: Configuration for the benchmark + """ return self._benchmark_config @property - def code_package(self) -> dict: + def code_package(self) -> Dict[str, Any]: + """ + Get the cached code package information, if available. + This typically includes 'location' (relative to cache_dir), 'hash', and 'size'. + + Returns: + Dict[str, Any]: Dictionary with code package information + """ + assert self._code_package is not None return self._code_package @property def functions(self) -> Dict[str, Any]: + """ + Get the cached information about deployed functions associated + with this benchmark for the current deployment, keyed by function name. + + Returns: + Dict[str, Any]: Dictionary of functions + """ + assert self._functions is not None return self._functions @property - def code_location(self): + def code_location(self) -> str: + """ + Get the absolute path to the prepared code package. + If cached, it points to the location within the SeBS cache directory. + Otherwise, it points to the build output directory. + + Returns: + str: Path to the code package + """ if self.code_package: return os.path.join(self._cache_client.cache_dir, self.code_package["location"]) else: + assert self._code_location is not None return self._code_location @property - def is_cached(self): + def is_cached(self) -> bool: + """ + Check if the benchmark is cached. + + Returns: + bool: True if cached, False otherwise + """ return self._is_cached @is_cached.setter def is_cached(self, val: bool): + """ + Set whether the benchmark is cached. + + Args: + val: True if cached, False otherwise + """ self._is_cached = val @property - def is_cached_valid(self): + def is_cached_valid(self) -> bool: + """ + True if a cached code package exists and its hash matches the current + benchmark source code hash. + + Returns: + bool: True if valid, False otherwise + """ return self._is_cached_valid @is_cached_valid.setter def is_cached_valid(self, val: bool): + """ + Set whether the cached benchmark is valid. + + Args: + val: True if valid, False otherwise + """ self._is_cached_valid = val @property - def code_size(self): + def code_size(self) -> int: + """ + Get the size of the code package in bytes. + + Returns: + int: Size in bytes + """ return self._code_size @property def container_uri(self) -> str: + """ + Get the URI of the container for container deployments. + + Returns: + str: Container URI + + Raises: + AssertionError: If container URI is None + """ assert self._container_uri is not None return self._container_uri @property def language(self) -> "Language": + """ + Get the programming language for the benchmark. + + Returns: + Language: Programming language + """ return self._language @property def language_name(self) -> str: + """ + Get the name of the programming language, e.g., "python". + + Returns: + str: Name of the language + """ return self._language.value @property - def language_version(self): + def language_version(self) -> str: + """ + Get the version of the programming language, e.g. "3.8". + + Returns: + str: Version of the language + """ return self._language_version @property def has_input_processed(self) -> bool: + """ + Check if input processing has been performed. + + Returns: + bool: True if processed, False otherwise + """ return self._input_processed @property def uses_storage(self) -> bool: + """ + Check if the benchmark uses cloud storage. + + Returns: + bool: True if using storage, False otherwise + """ return self._uses_storage @property def uses_nosql(self) -> bool: + """ + Check if the benchmark uses NoSQL databases. + + Returns: + bool: True if using NoSQL, False otherwise + """ return self._uses_nosql @property def architecture(self) -> str: + """ + Get the CPU architecture of the deployment target. + + Returns: + str: Architecture name (e.g., 'x86_64', 'arm64') + """ return self._architecture @property - def container_deployment(self): + def container_deployment(self) -> bool: + """ + Check if using container deployment. + + Returns: + bool: True if using container deployment, False otherwise + """ return self._container_deployment @property # noqa: A003 - def hash(self): + def hash(self) -> str: + """ + Get the hash of the benchmark code. + + Computes an MD5 hash of the benchmark directory to determine if + the code has changed since the last build. + + Returns: + str: MD5 hash as a hexadecimal string + """ path = os.path.join(self.benchmark_path, self.language_name) self._hash_value = Benchmark.hash_directory(path, self._deployment_name, self.language_name) return self._hash_value @@ -179,7 +433,12 @@ def hash(self): @hash.setter # noqa: A003 def hash(self, val: str): """ + Set the hash of the benchmark code. + Used only for testing purposes. + + Args: + val: MD5 hash as a hexadecimal string """ self._hash_value = val @@ -193,6 +452,25 @@ def __init__( cache_client: Cache, docker_client: docker.client, ): + """ + Initialize a Benchmark instance. + + Sets up a benchmark for a specific deployment platform, including configuration, + language runtime, and caching. Loads the benchmark configuration from the JSON file + and validates the language support. + + Args: + benchmark: Name of the benchmark + deployment_name: Name of the deployment platform (e.g., 'aws', 'azure') + config: Experiment configuration + system_config: SeBs system configuration + output_dir: Directory for output files + cache_client: Cache client for caching code packages + docker_client: Docker client for building dependencies + + Raises: + RuntimeError: If the benchmark is not found or doesn't support the language + """ super().__init__() self._benchmark = benchmark self._deployment_name = deployment_name @@ -201,9 +479,12 @@ def __init__( self._language_version = config.runtime.version self._architecture = self._experiment_config.architecture self._container_deployment = config.container_deployment - self._benchmark_path = find_benchmark(self.benchmark, "benchmarks") - if not self._benchmark_path: + + benchmark_path = find_benchmark(self.benchmark, "benchmarks") + if not benchmark_path: raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=self._benchmark)) + self._benchmark_path = benchmark_path + with open(os.path.join(self.benchmark_path, "config.json")) as json_file: self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) @@ -215,7 +496,7 @@ def __init__( self._cache_client = cache_client self._docker_client = docker_client self._system_config = system_config - self._hash_value = None + self._code_location: Optional[str] = None self._output_dir = os.path.join( output_dir, f"{benchmark}_code", @@ -232,7 +513,6 @@ def __init__( self._is_cached_valid = False # Load input module - self._benchmark_data_path = find_benchmark(self._benchmark, "benchmarks-data") self._benchmark_input_module = load_benchmark_input(self._benchmark_path) @@ -241,13 +521,23 @@ def __init__( self._uses_storage: bool = False self._uses_nosql: bool = False - """ + @staticmethod + def hash_directory(directory: str, deployment: str, language: str) -> str: + """ Compute MD5 hash of an entire directory. - """ - @staticmethod - def hash_directory(directory: str, deployment: str, language: str): + Calculates a hash of the benchmark source code by combining hashes of all + relevant files. This includes language-specific files, deployment wrappers, + and shared files like shell scripts and JSON configuration. + + Args: + directory: Path to the directory to hash + deployment: Name of the deployment platform + language: Programming language name + Returns: + str: MD5 hash as a hexadecimal string + """ hash_sum = hashlib.md5() FILES = { "python": ["*.py", "requirements.txt*"], @@ -272,10 +562,22 @@ def hash_directory(directory: str, deployment: str, language: str): return hash_sum.hexdigest() def serialize(self) -> dict: + """ + Serialize the benchmark to a dictionary. + + Returns: + dict: Dictionary containing size and hash of the benchmark code + """ return {"size": self.code_size, "hash": self.hash} - def query_cache(self): + def query_cache(self) -> None: + """ + Query the cache for existing benchmark code packages and functions. + Checks if there's a cached code package or container for this benchmark + and deployment combination. Updates the cache status fields based on + whether the cache exists and if it's still valid (hash matches). + """ if self.container_deployment: self._code_package = self._cache_client.get_container( deployment=self._deployment_name, @@ -312,7 +614,16 @@ def query_cache(self): self._is_cached = False self._is_cached_valid = False - def copy_code(self, output_dir): + def copy_code(self, output_dir: str) -> None: + """Copy benchmark source code to output directory. + + Copies language-specific source files and dependency files from the + benchmark directory to the output directory for deployment preparation. + Handles both Python requirements files and Node.js package.json files. + + Args: + output_dir: Destination directory for copied files + """ FILES = { "python": ["*.py", "requirements.txt*"], "nodejs": ["*.js", "package.json"], @@ -326,7 +637,16 @@ def copy_code(self, output_dir): if os.path.exists(nodejs_package_json): shutil.copy2(nodejs_package_json, os.path.join(output_dir, "package.json")) - def add_benchmark_data(self, output_dir): + def add_benchmark_data(self, output_dir: str) -> None: + """Add benchmark-specific data and assets to output directory. + + Executes benchmark initialization scripts (init.sh) if present in + the benchmark directory. These scripts typically download or generate + additional data files required by the benchmark. + + Args: + output_dir: Directory where benchmark data should be added + """ cmd = "/bin/bash {benchmark_path}/init.sh {output_dir} false {architecture}" paths = [ self.benchmark_path, @@ -345,7 +665,18 @@ def add_benchmark_data(self, output_dir): stderr=subprocess.STDOUT, ) - def add_deployment_files(self, output_dir): + def add_deployment_files(self, output_dir: str) -> None: + """Add deployment-specific wrapper files to output directory. + + Copies platform-specific wrapper files (handlers, adapters) that + integrate the benchmark code with the target FaaS platform's + execution environment. + + Files are sourced from `benchmarks/wrappers/{deployment_name}/{language_name}/`. + + Args: + output_dir: Directory where deployment files should be added + """ handlers_dir = project_absolute_path( "benchmarks", "wrappers", self._deployment_name, self.language_name ) @@ -358,7 +689,17 @@ def add_deployment_files(self, output_dir): for file in handlers: shutil.copy2(file, os.path.join(output_dir)) - def add_deployment_package_python(self, output_dir): + def add_deployment_package_python(self, output_dir: str) -> None: + """Add Python deployment packages to requirements file. + + Appends platform-specific Python packages and benchmark module + dependencies to the requirements.txt file for the deployment. + + Handles versioned requirements files (e.g., requirements.txt.3.8). + + Args: + output_dir: Directory containing the requirements file to modify + """ destination_file = f"requirements.txt.{self._language_version}" if not os.path.exists(os.path.join(output_dir, destination_file)): @@ -381,7 +722,16 @@ def add_deployment_package_python(self, output_dir): for package in module_packages[bench_module.value]: out.write(package) - def add_deployment_package_nodejs(self, output_dir): + def add_deployment_package_nodejs(self, output_dir: str) -> None: + """Add Node.js deployment packages to package.json. + + Modifies the package.json file to include platform-specific + Node.js dependencies required for deployment. + Handles versioned package.json files (e.g., package.json.12). + + Args: + output_dir: Directory containing the package.json file to modify + """ # modify package.json packages = self._system_config.deployment_packages( self._deployment_name, self.language_name @@ -399,7 +749,18 @@ def add_deployment_package_nodejs(self, output_dir): with open(package_config, "w") as package_file: json.dump(package_json, package_file, indent=2) - def add_deployment_package(self, output_dir): + def add_deployment_package(self, output_dir: str) -> None: + """Add deployment packages based on programming language. + + Delegates to language-specific package addition methods to include + platform-specific dependencies in the deployment package. + + Args: + output_dir: Directory where deployment packages should be added + + Raises: + NotImplementedError: If the language is not supported + """ from sebs.faas.function import Language if self.language == Language.PYTHON: @@ -410,14 +771,45 @@ def add_deployment_package(self, output_dir): raise NotImplementedError @staticmethod - def directory_size(directory: str): + def directory_size(directory: str) -> int: + """Calculate total size of all files in a directory. + + Recursively calculates the total size in bytes of all files + within the specified directory and its subdirectories. + + Args: + directory: Path to the directory to measure + + Returns: + int: Total size in bytes of all files in the directory + """ from pathlib import Path root = Path(directory) sizes = [f.stat().st_size for f in root.glob("**/*") if f.is_file()] return sum(sizes) - def install_dependencies(self, output_dir): + def install_dependencies(self, output_dir: str) -> None: + """Install benchmark dependencies using Docker. + + Uses Docker containers to install language-specific dependencies + (pip packages for Python, npm packages for Node.js) in an environment + matching the target deployment platform. + Pulls a pre-built Docker image specific to the deployment, language, and + runtime version. Mounts the output directory into the container and runs + an installer script (`/sebs/installer.sh`) within the container. + Handles fallbacks to unversioned Docker images if versioned ones are not found. + + Supports copying files to/from Docker for environments where volume mounting + is problematic (e.g., CircleCI). + + Args: + output_dir: Directory containing the code package to build + + Raises: + RuntimeError: If Docker image pull fails + docker.errors.ContainerError: If dependency installation fails + """ # do we have docker image for this run and language? if "build" not in self._system_config.docker_image_types( self._deployment_name, self.language_name @@ -549,9 +941,9 @@ def ensure_image(name: str) -> None: ) # copy updated code with package data, stat = container.get_archive("/mnt/function") - with open(tar_archive, "wb") as f: + with open(tar_archive, "wb") as output_filef: for chunk in data: - f.write(chunk) + output_filef.write(chunk) with tarfile.open(tar_archive, "r") as tar: tar.extractall(output_dir) # docker packs the entire directory with basename function @@ -574,7 +966,15 @@ def ensure_image(name: str) -> None: self.logging.error(f"Docker mount volumes: {volumes}") raise e - def recalculate_code_size(self): + def recalculate_code_size(self) -> int: + """Recalculate and update the code package size. + + Measures the current size of the output directory and updates + the internal code size tracking. + + Returns: + int: Updated code package size in bytes + """ self._code_size = Benchmark.directory_size(self._output_dir) return self._code_size @@ -584,6 +984,29 @@ def build( [str, str, str, str, str, bool, bool], Tuple[str, int, str] ], ) -> Tuple[bool, str, bool, str]: + """Build the complete benchmark deployment package. + + Orchestrates the entire build process for a benchmark, including: + - Code copying and dependency installation + - Adding benchmark data and deployment-specific files + - Running platform-specific build and packaging steps + (e.g., zipping, creating container image). + - Cache validation and reuse if possible + - Cache updates after successful build + + Args: + deployment_build_step: Platform-specific build function that takes + (output_dir, language, version, architecture, benchmark_name, + is_cached_valid, container_deployment) and returns + (code_location, code_size, container_uri) + + Returns: + Tuple containing: + - bool: Whether a new build was performed (False if cached) + - str: Path to the built code package + - bool: Whether this is a container deployment + - str: Container URI (empty string if not container deployment) + """ # Skip build if files are up to date and user didn't enforce rebuild if self.is_cached and self.is_cached_valid: @@ -656,7 +1079,24 @@ def build( def prepare_input( self, system_resources: SystemResources, size: str, replace_existing: bool = False - ): + ) -> Dict[str, str]: + """Prepare benchmark input data and allocate cloud resources. + + Locates the benchmark's input generator module (`input.py`), determines + storage requirements (object storage buckets, NoSQL tables), and invokes + the `generate_input` function from the module to create and upload + input data. Handles the setup of cloud storage buckets and NoSQL databases + required by the benchmark. + Updates the cache with storage details after successful preparation. + + Args: + system_resources: Cloud system resources manager + size: Benchmark workload size ('small', 'medium', 'large') + replace_existing: Whether to replace existing input data + + Returns: + Dict[str, str]: Input configuration for the benchmark function + """ """ Handle object storage buckets. @@ -700,6 +1140,7 @@ def prepare_input( # buckets = mod.buckets_count() # storage.allocate_buckets(self.benchmark, buckets) # Get JSON and upload data as required by benchmark + assert self._benchmark_data_path is not None input_config = self._benchmark_input_module.generate_input( self._benchmark_data_path, size, bucket, input, output, storage_func, nosql_func ) @@ -725,12 +1166,20 @@ def prepare_input( return input_config - """ + def code_package_modify(self, filename: str, data: bytes) -> None: + """Modify a file within the deployed code package. + + Updates a specific file within the code package without rebuilding + the entire package. Currently only supports ZIP archive packages. This is used in experiments that modify the size of input package. - This step allows to modify code package without going through the entire pipeline. - """ - def code_package_modify(self, filename: str, data: bytes): + Args: + filename: Name of the file to modify within the package + data: New content for the file as bytes + + Raises: + NotImplementedError: If the code package is not a ZIP archive + """ if self.code_package_is_archive(): self._update_zip(self.code_location, filename, data) @@ -745,19 +1194,50 @@ def code_package_modify(self, filename: str, data: bytes): """ def code_package_is_archive(self) -> bool: + """Check if the code package is an archive file. + + Determines whether the code package is stored as an archive file + (ZIP) rather than a directory structure. + + Returns: + bool: True if package is a ZIP archive, False if it's a directory + """ if os.path.isfile(self.code_location): extension = os.path.splitext(self.code_location)[1] return extension in [".zip"] return False def code_package_recompute_size(self) -> float: + """Recalculate the size of the code package file. + + Updates the internal size tracking after modifications to the + code package file. + + Returns: + float: Updated package size in bytes + """ bytes_size = os.path.getsize(self.code_location) self._code_size = bytes_size return bytes_size - # https://stackoverflow.com/questions/25738523/how-to-update-one-file-inside-zip-file-using-python @staticmethod - def _update_zip(zipname: str, filename: str, data: bytes): + def _update_zip(zipname: str, filename: str, data: bytes) -> None: + """Update a file within a ZIP archive. + + Replaces the content of a specific file within a ZIP archive + while preserving all other files and archive metadata. + + Creates a temporary zip file, copies all items from the original except + the target file (if it exists), and adds/replaces the target file with + new data. Finally, replaces the original zip with the temporary one. + Based on method from: + https://stackoverflow.com/questions/25738523/how-to-update-one-file-inside-zip-file-using-python + + Args: + zipname: Path to the ZIP archive to modify + filename: Name of the file to update within the archive + data: New content for the file as bytes + """ import zipfile import tempfile @@ -782,21 +1262,43 @@ def _update_zip(zipname: str, filename: str, data: bytes): zf.writestr(filename, data) -""" - The interface of `input` module of each benchmark. - Useful for static type hinting with mypy. -""" +class BenchmarkModuleInterface: + """Interface definition for benchmark input modules. + Useful for static type hinting with mypy and documentation. + This class defines the interface that benchmark input modules + must implement to provide input data generation, storage allocation, + and NoSQL database setup for benchmarks. + + All methods are static as they operate on benchmark data rather than + instance state. Benchmark modules are dynamically loaded from the + input.py file in each benchmark directory. + """ -class BenchmarkModuleInterface: @staticmethod @abstractmethod def buckets_count() -> Tuple[int, int]: + """Get the number of storage buckets required by the benchmark. + + Returns: + Tuple[int, int]: Number of (input_buckets, output_buckets) needed + """ pass @staticmethod @abstractmethod - def allocate_nosql() -> dict: + def allocate_nosql() -> Dict[str, Dict[str, str]]: + """Define NoSQL table requirements for the benchmark. + + Returns: + Dict containing table definitions with primary and secondary keys: + { + 'table_name': { + 'primary_key': 'key_field_name', + 'secondary_key': 'optional_secondary_key_name' + } + } + """ pass @staticmethod @@ -812,10 +1314,43 @@ def generate_input( Callable[[str, str, dict, Tuple[str, str], Optional[Tuple[str, str]]], None] ], ) -> Dict[str, str]: + """Generate benchmark input data and configuration. + + Creates the input data files and configuration needed for benchmark + execution, uploading data to cloud storage and NoSQL databases as needed. + + Args: + data_dir: Directory containing benchmark data files + size: Benchmark workload size ('small', 'medium', 'large') + benchmarks_bucket: Name of the cloud storage bucket for data + input_paths: List of input data paths in cloud storage + output_paths: List of output data paths in cloud storage + upload_func: Function for uploading files to cloud storage + nosql_func: Function for writing data to NoSQL databases + + Returns: + Dict[str, str]: Input configuration dictionary for the benchmark + """ pass def load_benchmark_input(benchmark_path: str) -> BenchmarkModuleInterface: + """Dynamically load the input module for a benchmark. + + Loads the input.py file from the benchmark directory and returns it + as a module interface for generating benchmark input data. + + Args: + benchmark_path: Path to the benchmark directory containing input.py + + Returns: + BenchmarkModuleInterface: Loaded input module with benchmark-specific + input generation functions + + Raises: + FileNotFoundError: If input.py is not found in the benchmark directory + ImportError: If the input module cannot be loaded + """ # Look for input generator file in the directory containing benchmark import importlib.machinery import importlib.util diff --git a/sebs/cache.py b/sebs/cache.py index f690e747..b907d440 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -1,4 +1,26 @@ -# https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth +"""Caching system for SeBS (Serverless Benchmarking Suite). + +This module provides comprehensive caching functionality for the SeBS framework, +including configuration caching, code package management, function deployment +tracking, and storage resource management. + +The Cache class manages persistent storage of benchmark configurations, compiled +code packages, Docker containers, deployed functions, and cloud resource +configurations to optimize repeated benchmark executions and deployments. + +This class is essential for efficient benchmarking - we avoid regenerating +cloud resources, and we do not have to keeep querying them every time +we start the benchmark. This is particularly important for cloud platforms +like Azure, where queries require CLI tool running in a container and can +take long time to resolve. + +Example: + Basic cache usage: + cache = Cache("/path/to/cache", docker_client) + config = cache.get_benchmark_config("aws", "110.dynamic-html") + cache.add_code_package("aws", benchmark_instance) +""" + import collections.abc import docker import datetime @@ -6,7 +28,7 @@ import os import shutil import threading -from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING # noqa +from typing import Any, Callable, Dict, List, Mapping, Optional, TYPE_CHECKING # noqa from sebs.utils import LoggingBase, serialize @@ -15,7 +37,22 @@ from sebs.faas.function import Function -def update(d, u): +def update(d: Dict[str, Any], u: Mapping[str, Any]) -> Dict[str, Any]: + """Recursively update nested dictionary with another dictionary. + + This function performs deep merge of two dictionaries, merging nested + dictionary values rather than replacing them entirely. + + + Args: + d (Dict[str, Any]): The target dictionary to update. + u (Mapping[str, Any]): The source dictionary with updates. + + Returns: + Dict[str, Any]: The updated dictionary. + """ + + # https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth for k, v in u.items(): if isinstance(v, collections.abc.Mapping): d[k] = update(d.get(k, {}), v) @@ -24,8 +61,19 @@ def update(d, u): return d -def update_dict(cfg, val, keys): - def map_keys(obj, val, keys): +def update_dict(cfg: Dict[str, Any], val: Any, keys: List[str]) -> None: + """Update dictionary value at nested key path. + + Updates a nested dictionary by setting a value at a path specified + by a list of keys. Creates intermediate dictionaries as needed. + + Args: + cfg (Dict[str, Any]): The dictionary to update. + val (Any): The value to set at the key path. + keys (List[str]): List of keys forming the path to the target location. + """ + + def map_keys(obj: Dict[str, Any], val: Any, keys: List[str]) -> Dict[str, Any]: if len(keys): return {keys[0]: map_keys(obj, val, keys[1:])} else: @@ -35,14 +83,36 @@ def map_keys(obj, val, keys): class Cache(LoggingBase): - cached_config: Dict[str, str] = {} + """Persistent caching system for SeBS benchmark configurations and deployments. + + This class provides comprehensive caching functionality for SeBS benchmarks, + including configuration management, code package storage, function tracking, + and cloud resource management. It uses a file-based cache system with + thread-safe operations. + + Attributes: + cached_config (Dict[str, Any]): In-memory cache of cloud configurations. + config_updated (bool): Flag indicating if configuration needs to be saved. + cache_dir (str): Absolute path to the cache directory. + ignore_functions (bool): Flag to skip function caching operations. + ignore_storage (bool): Flag to skip storage resoyrce caching. + docker_client (docker.DockerClient): Docker client for container operations. """ - Indicate that cloud offerings updated credentials or settings. - Thus we have to write down changes. - """ - config_updated = False - def __init__(self, cache_dir: str, docker_client: docker.DockerClient): + cached_config: Dict[str, Any] = {} + config_updated: bool = False + + def __init__(self, cache_dir: str, docker_client: docker.DockerClient) -> None: + """Initialize the Cache with directory and Docker client. + + Sets up the cache directory structure and loads existing configurations. + Creates the cache directory if it doesn't exist, otherwise loads + existing cached configurations. + + Args: + cache_dir (str): Path to the cache directory. + docker_client (docker.DockerClient): Docker client for container operations. + """ super().__init__() self.docker_client = docker_client self.cache_dir = os.path.abspath(cache_dir) @@ -56,36 +126,66 @@ def __init__(self, cache_dir: str, docker_client: docker.DockerClient): @staticmethod def typename() -> str: - return "Benchmark" + """Get the typename for this cache. + + Returns: + str: The cache type name. + """ + return "Cache" + + def load_config(self) -> None: + """Load cached cloud configurations from disk. - def load_config(self): + Reads configuration files for all supported cloud platforms from + the cache directory and loads them into memory. + """ with self._lock: for cloud in ["azure", "aws", "gcp", "openwhisk", "local"]: cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) if os.path.exists(cloud_config_file): - self.cached_config[cloud] = json.load(open(cloud_config_file, "r")) + with open(cloud_config_file, "r") as f: + self.cached_config[cloud] = json.load(f) + + def get_config(self, cloud: str) -> Optional[Dict[str, Any]]: + """Get cached configuration for a specific cloud provider. + + Args: + cloud (str): Cloud provider name (e.g., 'aws', 'azure', 'gcp'). - def get_config(self, cloud): + Returns: + Optional[Dict[str, Any]]: The cached configuration or None if not found. + """ return self.cached_config[cloud] if cloud in self.cached_config else None - """ - Update config values. Sets flag to save updated content in the end. - val: new value to store - keys: array of consecutive keys for multi-level dictionary - """ + def update_config(self, val: Any, keys: List[str]) -> None: + """Update configuration values at nested key path. - def update_config(self, val, keys): + Updates cached configuration by setting a value at the specified + nested key path. Sets the config_updated flag to ensure changes + are persisted to disk. + + Args: + val (Any): New value to store. + keys (List[str]): Array of consecutive keys for multi-level dictionary. + """ with self._lock: update_dict(self.cached_config, val, keys) self.config_updated = True - def lock(self): + def lock(self) -> None: + """Acquire the cache lock for thread-safe operations.""" self._lock.acquire() - def unlock(self): + def unlock(self) -> None: + """Release the cache lock.""" self._lock.release() - def shutdown(self): + def shutdown(self) -> None: + """Save cached configurations to disk if they were updated. + + Writes all updated cloud configurations back to their respective + JSON files in the cache directory. + """ if self.config_updated: for cloud in ["azure", "aws", "gcp", "openwhisk", "local"]: if cloud in self.cached_config: @@ -94,32 +194,24 @@ def shutdown(self): with open(cloud_config_file, "w") as out: json.dump(self.cached_config[cloud], out, indent=2) - """ - Access cached config of a benchmark. + def get_benchmark_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """Access cached configuration of a benchmark. - :param deployment: allowed deployment clouds or local - :param benchmark: - :param language: + Args: + deployment (str): Deployment platform ('aws', 'azure', 'gcp', 'openwhisk', 'local'). + benchmark (str): Benchmark name (e.g., '110.dynamic-html'). - :return: a JSON config or None when not exists - """ - - def get_benchmark_config(self, deployment: str, benchmark: str): + Returns: + Optional[Dict[str, Any]]: Benchmark configuration or None if not found. + """ benchmark_dir = os.path.join(self.cache_dir, benchmark) if os.path.exists(benchmark_dir): - with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: - cfg = json.load(fp) - return cfg[deployment] if deployment in cfg else None - - """ - Access cached version of benchmark code. - - :param deployment: allowed deployment clouds or local - :param benchmark: - :param language: - - :return: a tuple of JSON config and absolute path to code or None - """ + config_file = os.path.join(benchmark_dir, "config.json") + if os.path.exists(config_file): + with open(config_file, "r") as fp: + cfg = json.load(fp) + return cfg[deployment] if deployment in cfg else None + return None def get_code_package( self, @@ -129,6 +221,18 @@ def get_code_package( language_version: str, architecture: str, ) -> Optional[Dict[str, Any]]: + """Access cached version of benchmark code package. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + language (str): Programming language. + language_version (str): Language version. + architecture (str): Target architecture. + + Returns: + Optional[Dict[str, Any]]: Code package configuration or None if not found. + """ cfg = self.get_benchmark_config(deployment, benchmark) key = f"{language_version}-{architecture}" @@ -145,6 +249,18 @@ def get_container( language_version: str, architecture: str, ) -> Optional[Dict[str, Any]]: + """Access cached container configuration for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + language (str): Programming language. + language_version (str): Language version. + architecture (str): Target architecture. + + Returns: + Optional[Dict[str, Any]]: Container configuration or None if not found. + """ cfg = self.get_benchmark_config(deployment, benchmark) key = f"{language_version}-{architecture}" @@ -156,57 +272,114 @@ def get_container( def get_functions( self, deployment: str, benchmark: str, language: str ) -> Optional[Dict[str, Any]]: + """Get cached function configurations for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + language (str): Programming language. + + Returns: + Optional[Dict[str, Any]]: Function configurations or None if not found. + """ cfg = self.get_benchmark_config(deployment, benchmark) if cfg and language in cfg and not self.ignore_functions: return cfg[language]["functions"] else: return None - """ - Access cached storage config of a benchmark. - - :param deployment: allowed deployment clouds or local - :param benchmark: + def get_storage_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """Access cached storage configuration of a benchmark. - :return: a JSON config or None - """ + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. - def get_storage_config(self, deployment: str, benchmark: str): + Returns: + Optional[Dict[str, Any]]: Storage configuration or None if not found. + """ return self._get_resource_config(deployment, benchmark, "storage") - def get_nosql_config(self, deployment: str, benchmark: str): + def get_nosql_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """Access cached NoSQL configuration of a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + + Returns: + Optional[Dict[str, Any]]: NoSQL configuration or None if not found. + """ return self._get_resource_config(deployment, benchmark, "nosql") - def _get_resource_config(self, deployment: str, benchmark: str, resource: str): + def _get_resource_config( + self, deployment: str, benchmark: str, resource: str + ) -> Optional[Dict[str, Any]]: + """Helper to retrieve a specific type of resource + configuration from the benchmark's cache. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + resource (str): Resource type ('storage' or 'nosql'). + + Returns: + Optional[Dict[str, Any]]: Resource configuration or None if not found. + """ cfg = self.get_benchmark_config(deployment, benchmark) return cfg[resource] if cfg and resource in cfg and not self.ignore_storage else None - def update_storage(self, deployment: str, benchmark: str, config: dict): + def update_storage(self, deployment: str, benchmark: str, config: Dict[str, Any]) -> None: + """Update cached storage configuration for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + config (Dict[str, Any]): Storage configuration to cache. + """ if self.ignore_storage: return self._update_resources(deployment, benchmark, "storage", config) - def update_nosql(self, deployment: str, benchmark: str, config: dict): + def update_nosql(self, deployment: str, benchmark: str, config: Dict[str, Any]) -> None: + """Update cached NoSQL configuration for a benchmark. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + config (Dict[str, Any]): NoSQL configuration to cache. + """ if self.ignore_storage: return self._update_resources(deployment, benchmark, "nosql", config) - def _update_resources(self, deployment: str, benchmark: str, resource: str, config: dict): - if self.ignore_storage: - return + def _update_resources( + self, deployment: str, benchmark: str, resource: str, config: Dict[str, Any] + ) -> None: + """Internal helper to update a resource configuration (storage or NoSQL) in the cache. + + Since the benchmark data is prepared before creating and caching a function, + it ensures the benchmark's cache directory exists and updates the `config.json` file + within it. + + Args: + deployment (str): Deployment platform name. + benchmark (str): Benchmark name. + resource (str): Resource type ('storage' or 'nosql'). + config (Dict[str, Any]): Resource configuration to cache. """ - We are now preparing benchmark data before caching function. - Thus, we have to take over a situation where the cache directory does not exist. - """ + if self.ignore_storage: + return benchmark_dir = os.path.join(self.cache_dir, benchmark) os.makedirs(benchmark_dir, exist_ok=True) with self._lock: - if os.path.exists(os.path.join(benchmark_dir, "config.json")): - with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: + config_file = os.path.join(benchmark_dir, "config.json") + if os.path.exists(config_file): + with open(config_file, "r") as fp: cached_config = json.load(fp) else: cached_config = {} @@ -216,14 +389,28 @@ def _update_resources(self, deployment: str, benchmark: str, resource: str, conf else: cached_config[deployment] = {resource: config} - with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: + with open(config_file, "w") as fp: json.dump(cached_config, fp, indent=2) def add_code_package( self, deployment_name: str, code_package: "Benchmark", - ): + ) -> None: + """Add a new code package to the cache. + + Copies the code package (directory or zip file) into the cache structure. + Records metadata (hash, size, location, timestamps, image details if container) + in the benchmark's `config.json` within the cache. + Handles both package and container deployments. + + Args: + deployment_name (str): Name of the deployment platform. + code_package (Benchmark): The benchmark code package to cache. + + Raises: + RuntimeError: If cached application already exists for the deployment. + """ with self._lock: language = code_package.language_name language_version = code_package.language_version @@ -331,7 +518,17 @@ def update_code_package( self, deployment_name: str, code_package: "Benchmark", - ): + ) -> None: + """Update an existing code package in the cache. + + Copies the new code package version over the old one. Updates metadata + (hash, size, modification timestamp, image details if container) in the + benchmark's `config.json`. If the cached package doesn't exist, adds it as a new package. + + Args: + deployment_name (str): Name of the deployment platform. + code_package (Benchmark): The benchmark code package to update. + """ with self._lock: language = code_package.language_name language_version = code_package.language_version @@ -392,24 +589,27 @@ def update_code_package( else: self.add_code_package(deployment_name, code_package) - """ - Add new function to cache. - - :param deployment: - :param benchmark: - :param language: - :param code_package: Path to directory/ZIP with code. - :param language_config: Configuration of language and code. - :param storage_config: Configuration of storage buckets. - """ - def add_function( self, deployment_name: str, language_name: str, code_package: "Benchmark", function: "Function", - ): + ) -> None: + """Add new function to cache. + + Caches a deployed function configuration for a benchmark. Links the + function to its corresponding code package. + + Args: + deployment_name (str): Name of the deployment platform. + language_name (str): Programming language name. + code_package (Benchmark): The benchmark code package. + function (Function): The deployed function to cache. + + Raises: + RuntimeError: If code package doesn't exist in cache. + """ if self.ignore_functions: return with self._lock: @@ -436,7 +636,18 @@ def add_function( "Can't cache function {} for a non-existing code package!".format(function.name) ) - def update_function(self, function: "Function"): + def update_function(self, function: "Function") -> None: + """Update an existing function in the cache. + + Updates cached function configuration with new metadata. Searches + across all deployments and languages to find the function by name. + + Args: + function (Function): The function with updated configuration. + + Raises: + RuntimeError: If function's code package doesn't exist in cache. + """ if self.ignore_functions: return with self._lock: diff --git a/sebs/config.py b/sebs/config.py index c3030ea0..5c91a221 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -1,3 +1,14 @@ +"""Configuration management for SeBS (Serverless Benchmarking Suite). + +This module provides configuration management functionality for the SeBS framework, +including system configuration loading, Docker image management, and deployment +setting retrieval from the systems.json configuration file. + +The SeBSConfig class serves as the central configuration manager that provides +access to platform-specific settings, language configurations, and deployment +options across different cloud providers and local deployments. +""" + import json from typing import Dict, List, Optional @@ -5,23 +16,68 @@ class SeBSConfig: - def __init__(self): + """Central configuration manager for SeBS framework. + + This class manages all configuration settings for the SeBS benchmarking suite, + including system configurations, Docker settings, deployment options, and + platform-specific parameters. It loads configuration from systems.json and + provides convenient access methods for various configuration aspects. + + Attributes: + _system_config (Dict): The loaded system configuration from systems.json. + _image_tag_prefix (str): Custom prefix for Docker image tags. + """ + + def __init__(self) -> None: + """Initialize SeBSConfig by loading system configuration. + + Loads the systems.json configuration file and initializes the image tag prefix. + + Raises: + FileNotFoundError: If systems.json configuration file is not found. + json.JSONDecodeError: If systems.json contains invalid JSON. + """ with open(project_absolute_path("config", "systems.json"), "r") as cfg: self._system_config = json.load(cfg) self._image_tag_prefix = "" @property def image_tag_prefix(self) -> str: + """Get the current Docker image tag prefix. + + Returns: + str: The current image tag prefix. + """ return self._image_tag_prefix @image_tag_prefix.setter - def image_tag_prefix(self, tag: str): + def image_tag_prefix(self, tag: str) -> None: + """Set the Docker image tag prefix. + + Args: + tag (str): The prefix to use for Docker image tags. + """ self._image_tag_prefix = tag def docker_repository(self) -> str: + """Get the Docker repository name from configuration. + + Returns: + str: The Docker repository name configured in systems.json. + """ return self._system_config["general"]["docker_repository"] def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[str, str]: + """Get deployment packages for a specific deployment and language. + These are packages added by SeBS to the benchmark's list of dependencies. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + Dict[str, str]: Dictionary mapping package names to their versions. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "packages" ] @@ -29,42 +85,119 @@ def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[ def deployment_module_packages( self, deployment_name: str, language_name: str ) -> Dict[str, str]: + """Get deployment module packages for a specific deployment and language, e.g., + packages specific to object or NoSQL storage. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + Dict[str, str]: Dictionary mapping module package names to their versions. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "module_packages" ] def deployment_files(self, deployment_name: str, language_name: str) -> List[str]: + """Get deployment files list for a specific deployment and language. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + List[str]: List of required deployment files. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "files" ] def docker_image_types(self, deployment_name: str, language_name: str) -> List[str]: + """Get available Docker image types for a deployment and language. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + List[str]: List of available Docker image types. + """ return self._system_config[deployment_name]["languages"][language_name]["images"] def supported_language_versions( self, deployment_name: str, language_name: str, architecture: str ) -> List[str]: + """Get supported language versions for a deployment, language, and architecture. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + architecture (str): Target architecture (e.g., 'x64', 'arm64'). + + Returns: + List[str]: List of supported language versions. + """ languages = self._system_config.get(deployment_name, {}).get("languages", {}) base_images = languages.get(language_name, {}).get("base_images", {}) return list(base_images.get(architecture, {}).keys()) def supported_architecture(self, deployment_name: str) -> List[str]: + """Get supported architectures for a deployment platform. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + + Returns: + List[str]: List of supported architectures (e.g., ['x64', 'arm64']). + """ return self._system_config[deployment_name]["architecture"] def supported_package_deployment(self, deployment_name: str) -> bool: + """Check if package-based deployment is supported for a platform. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + + Returns: + bool: True if package deployment is supported, False otherwise. + """ return "package" in self._system_config[deployment_name]["deployments"] def supported_container_deployment(self, deployment_name: str) -> bool: + """Check if container-based deployment is supported for a platform. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + + Returns: + bool: True if container deployment is supported, False otherwise. + """ return "container" in self._system_config[deployment_name]["deployments"] def benchmark_base_images( self, deployment_name: str, language_name: str, architecture: str ) -> Dict[str, str]: + """Get base Docker images for benchmarks on a specific platform. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + architecture (str): Target architecture (e.g., 'x64', 'arm64'). + + Returns: + Dict[str, str]: Dictionary mapping language versions to base image names. + """ return self._system_config[deployment_name]["languages"][language_name]["base_images"][ architecture ] def version(self) -> str: + """Get the SeBS framework version. + + Returns: + str: The SeBS version string, or 'unknown' if not configured. + """ return self._system_config["general"].get("SeBS_version", "unknown") def benchmark_image_name( @@ -76,7 +209,19 @@ def benchmark_image_name( architecture: str, registry: Optional[str] = None, ) -> str: + """Generate full Docker image name for a benchmark. + + Args: + system (str): Deployment system name (e.g., 'aws', 'azure'). + benchmark (str): Benchmark name (e.g., '110.dynamic-html'). + language_name (str): Programming language name (e.g., 'python'). + language_version (str): Language version (e.g., '3.8'). + architecture (str): Target architecture (e.g., 'x64'). + registry (Optional[str]): Docker registry URL. If None, uses default repository. + Returns: + str: Complete Docker image name including registry and tag. + """ tag = self.benchmark_image_tag( system, benchmark, language_name, language_version, architecture ) @@ -94,6 +239,24 @@ def benchmark_image_tag( language_version: str, architecture: str, ) -> str: + """Generate Docker image tag for a benchmark container. + + Creates a standardized tag format that includes system, benchmark, language, + version, architecture, optional prefix, and SeBS version. + + Format: function.{system}.{benchmark}.{language_name}-{language_version}- + {architecture}[-{image_prefix}]-{sebs_version} + + Args: + system (str): Deployment system name (e.g., 'aws', 'azure'). + benchmark (str): Benchmark name (e.g., '110.dynamic-html'). + language_name (str): Programming language name (e.g., 'python'). + language_version (str): Language version (e.g., '3.8'). + architecture (str): Target architecture (e.g., 'x64'). + + Returns: + str: Generated Docker image tag. + """ tag = f"function.{system}.{benchmark}.{language_name}-{language_version}-{architecture}" if self.image_tag_prefix: tag = f"{tag}-{self.image_tag_prefix}" @@ -102,4 +265,13 @@ def benchmark_image_tag( return tag def username(self, deployment_name: str, language_name: str) -> str: + """Get the username for a specific deployment and language configuration. + + Args: + deployment_name (str): Name of the deployment platform (e.g., 'aws', 'azure'). + language_name (str): Programming language name (e.g., 'python', 'nodejs'). + + Returns: + str: The username configured for the deployment and language combination. + """ return self._system_config[deployment_name]["languages"][language_name]["username"] diff --git a/sebs/experiments/__init__.py b/sebs/experiments/__init__.py index ff820d40..64e82c00 100644 --- a/sebs/experiments/__init__.py +++ b/sebs/experiments/__init__.py @@ -1,3 +1,18 @@ +"""Experiment implementations for serverless benchmarking. + +This package provides a collection of experiment implementations for +measuring various aspects of serverless function performance: + +- PerfCost: Measures performance and cost characteristics +- NetworkPingPong: Measures network latency and throughput +- EvictionModel: Measures container eviction patterns +- InvocationOverhead: Measures function invocation overhead + +Each experiment is designed to evaluate specific aspects of serverless +platforms, enabling detailed comparison between different providers, +configurations, and workloads. +""" + from .result import Result as ExperimentResult # noqa from .experiment import Experiment # noqa from .perf_cost import PerfCost # noqa diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index 26aea9f2..6ba4ce4f 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -1,10 +1,42 @@ +"""Configuration management for benchmark experiments. + +This module provides the configuration class for benchmark experiments, +handling settings such as: +- Runtime environment (language, version) +- Architecture (x64, arm64) +- Deployment type (container, package) +- Code and storage update flags +- Experiment-specific settings + +The Config class handles serialization and deserialization of experiment +configurations, allowing them to be loaded from and saved to configuration files. +""" + from typing import Dict from sebs.faas.function import Runtime class Config: + """Configuration class for benchmark experiments. + + This class manages the configuration settings for benchmark experiments, + including runtime environment, architecture, deployment type, and + experiment-specific settings. + + Attributes: + _update_code: Whether to update function code + _update_storage: Whether to update storage resources + _container_deployment: Whether to use container-based deployment + _download_results: Whether to download experiment results + _architecture: CPU architecture (e.g., "x64", "arm64") + _flags: Dictionary of boolean flags for custom settings + _experiment_configs: Dictionary of experiment-specific settings + _runtime: Runtime environment (language and version) + """ + def __init__(self): + """Initialize a new experiment configuration with default values.""" self._update_code: bool = False self._update_storage: bool = False self._container_deployment: bool = False @@ -16,35 +48,95 @@ def __init__(self): @property def update_code(self) -> bool: + """Get whether to update function code. + + Returns: + True if function code should be updated, False otherwise + """ return self._update_code @update_code.setter def update_code(self, val: bool): + """Set whether to update function code. + + Args: + val: True if function code should be updated, False otherwise + """ self._update_code = val @property def update_storage(self) -> bool: + """Get whether to update storage resources. + + Returns: + True if storage resources should be updated, False otherwise + """ return self._update_storage def check_flag(self, key: str) -> bool: + """Check if a specific experiment flag is set. + + Currently it is only used to let benchmark know that Docker + volumes are disabled (e.g., in CircleCI environment). + + Args: + key: Name of the flag to check + + Returns: + Value of the flag, or False if the flag is not set + """ return False if key not in self._flags else self._flags[key] @property def runtime(self) -> Runtime: + """Get the runtime environment. + + Returns: + Runtime environment (language and version) + """ return self._runtime @property def architecture(self) -> str: + """Get the CPU architecture. + + Returns: + CPU architecture (e.g., "x64", "arm64") + """ return self._architecture @property def container_deployment(self) -> bool: + """Get whether to use container-based deployment. + + Returns: + True if container-based deployment should be used, False otherwise + """ return self._container_deployment def experiment_settings(self, name: str) -> dict: + """Get settings for a specific experiment. + + Args: + name: Name of the experiment + + Returns: + Dictionary of experiment-specific settings + + Raises: + KeyError: If the experiment name is not found in the configuration + """ return self._experiment_configs[name] def serialize(self) -> dict: + """Serialize the configuration to a dictionary. + + This method converts the configuration object to a dictionary + that can be saved to a file or passed to other components. + + Returns: + Dictionary representation of the configuration + """ out = { "update_code": self._update_code, "update_storage": self._update_storage, @@ -60,7 +152,22 @@ def serialize(self) -> dict: # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(config: dict) -> "Config": + """Deserialize a configuration from a dictionary. + + This method creates a new configuration object from a dictionary + representation, which may have been loaded from a file or passed + from another component. + + Args: + config: Dictionary representation of the configuration + + Returns: + A new configuration object with settings from the dictionary + Note: + This method requires Python 3.7+ for proper type annotations. + The string type annotation is a forward reference to the Config class. + """ cfg = Config() cfg._update_code = config["update_code"] cfg._update_storage = config["update_storage"] @@ -70,6 +177,7 @@ def deserialize(config: dict) -> "Config": cfg._flags = config["flags"] if "flags" in config else {} cfg._architecture = config["architecture"] + # Import experiment types here to avoid circular import from sebs.experiments import ( NetworkPingPong, PerfCost, @@ -77,6 +185,7 @@ def deserialize(config: dict) -> "Config": EvictionModel, ) + # Load experiment-specific settings if present for exp in [NetworkPingPong, PerfCost, InvocationOverhead, EvictionModel]: if exp.name() in config: cfg._experiment_configs[exp.name()] = config[exp.name()] diff --git a/sebs/experiments/environment.py b/sebs/experiments/environment.py index 86576f11..ca25cae3 100644 --- a/sebs/experiments/environment.py +++ b/sebs/experiments/environment.py @@ -1,17 +1,53 @@ -from typing import List +"""Environment management for experiment execution. -from sebs.utils import execute +This module provides the ExperimentEnvironment class for managing CPU settings +and system configuration during benchmark experiments. This is useful for local, +Docker-based executions. It handles: -""" - Assumes that all cores are online in the beginning. - TODO: use lscpu to discover online cores +- CPU frequency scaling and governor management +- Hyperthreading control (enable/disable) +- CPU boost control +- Memory management (page cache dropping) +- Intel CPU-specific optimizations + +Currently supports only Intel CPUs with the intel_pstate driver. - Currently supports only Intel CPUs with intel_pstate driver. +Note: + This module assumes that all CPU cores are online at initialization. + Future versions should use lscpu to discover online cores dynamically. """ +from typing import Dict, List + +from sebs.utils import execute + class ExperimentEnvironment: - def __init__(self): + """Environment management for benchmark experiments. + + This class provides methods to control CPU settings, memory management, + and other system configurations that can affect benchmark results. + It focuses on creating a stable, reproducible environment for experiments. + + Attributes: + _cpu_mapping: Dictionary mapping physical cores to logical cores + _vendor: CPU vendor identifier (currently only "intel" supported) + _governor: CPU frequency scaling governor (e.g., "intel_pstate") + _prev_boost_status: Previous boost status for restoration + _prev_min_freq: Previous minimum frequency setting for restoration + """ + + def __init__(self) -> None: + """Initialize the experiment environment. + + Discovers CPU topology, checks vendor compatibility, and verifies + the CPU frequency scaling driver. Currently only supports Intel CPUs + with the intel_pstate driver. + + Raises: + NotImplementedError: If CPU vendor is not Intel or scaling driver + is not intel_pstate + """ # find CPU mapping ret = execute('cat /proc/cpuinfo | grep -e "processor" -e "core id"', shell=True) # skip empty line at the end @@ -28,7 +64,7 @@ def __init__(self): for cpu_id in range(1, number_of_cores) ] - self._cpu_mapping = {} + self._cpu_mapping: Dict[int, List[Dict[str, int]]] = {} # iterate over every two elements i na list for logical_core, physical_core in zip(*[iter(mapping)] * 2): core_description = { @@ -42,7 +78,7 @@ def __init__(self): vendor = execute('lscpu | grep -e "Vendor ID"', shell=True).split(";")[1] if vendor == "GenuineIntel": - self._vendor = "intel" + self._vendor: str = "intel" else: raise NotImplementedError() @@ -50,11 +86,17 @@ def __init__(self): scaling_governor_path = "/sys/devices/system/cpu/cpu{cpu_id}/cpufreq/scaling_driver" governor = execute("cat {path}".format(path=scaling_governor_path)) if governor == "intel_pstate": - self._governor = governor + self._governor: str = governor else: raise NotImplementedError() - def write_cpu_status(self, cores: List[int], status: int): + def write_cpu_status(self, cores: List[int], status: int) -> None: + """Write CPU online status for specified cores. + + Args: + cores: List of physical core IDs to modify + status: Status to set (0 for offline, 1 for online) + """ cpu_status_path = "/sys/devices/system/cpu/cpu{cpu_id}/online" for core in cores: @@ -66,13 +108,31 @@ def write_cpu_status(self, cores: List[int], status: int): shell=True, ) - def disable_hyperthreading(self, cores: List[int]): + def disable_hyperthreading(self, cores: List[int]) -> None: + """Disable hyperthreading for specified cores. + + Args: + cores: List of physical core IDs to disable hyperthreading for + """ self.write_cpu_status(cores, 0) - def enable_hyperthreading(self, cores: List[int]): + def enable_hyperthreading(self, cores: List[int]) -> None: + """Enable hyperthreading for specified cores. + + Args: + cores: List of physical core IDs to enable hyperthreading for + """ self.write_cpu_status(cores, 1) - def disable_boost(self, cores: List[int]): + def disable_boost(self, cores: List[int]) -> None: + """Disable CPU boost (turbo) for specified cores. + + Args: + cores: List of physical core IDs to disable boost for + + Raises: + NotImplementedError: If CPU governor is not intel_pstate + """ if self._governor == "intel_pstate": boost_path = "/sys/devices/system/cpu/intel_pstate" self._prev_boost_status = execute("cat " + boost_path) @@ -80,7 +140,17 @@ def disable_boost(self, cores: List[int]): else: raise NotImplementedError() - def enable_boost(self, cores: List[int]): + def enable_boost(self, cores: List[int]) -> None: + """Enable CPU boost (turbo) for specified cores. + + Restores the previous boost status that was saved when boost was disabled. + + Args: + cores: List of physical core IDs to enable boost for + + Raises: + NotImplementedError: If CPU governor is not intel_pstate + """ if self._governor == "intel_pstate": boost_path = "/sys/devices/system/cpu/intel_pstate" execute( @@ -91,25 +161,63 @@ def enable_boost(self, cores: List[int]): else: raise NotImplementedError() - def drop_page_cache(self): + def drop_page_cache(self) -> None: + """Drop system page cache to ensure clean memory state. + + This method clears the page cache to prevent cached data from + affecting benchmark measurements. + """ execute("echo 3 | sudo tee /proc/sys/vm/drop_caches") - def set_frequency(self, max_freq: int): + def set_frequency(self, max_freq: int) -> None: + """Set minimum CPU frequency percentage. + + Args: + max_freq: Minimum frequency percentage (0-100) + """ path = "/sys/devices/system/cpu/intel_pstate/min_perf_pct" self._prev_min_freq = execute("cat " + path) execute("echo {freq} | sudo tee {path}".format(freq=max_freq, path=path)) - def unset_frequency(self): + def unset_frequency(self) -> None: + """Restore previous minimum CPU frequency setting. + + Restores the frequency setting that was saved when set_frequency + was called. + """ path = "/sys/devices/system/cpu/intel_pstate/min_perf_pct" execute("echo {freq} | sudo tee {path}".format(freq=self._prev_min_freq, path=path)) - def setup_benchmarking(self, cores: List[int]): + def setup_benchmarking(self, cores: List[int]) -> None: + """Set up the environment for stable benchmarking. + + This method applies a standard set of optimizations to create + a stable environment for benchmarking: + - Disables CPU boost/turbo + - Disables hyperthreading + - Sets CPU frequency to maximum + - Drops page cache + + Args: + cores: List of physical core IDs to configure + """ self.disable_boost(cores) self.disable_hyperthreading(cores) self.set_frequency(100) self.drop_page_cache() - def after_benchmarking(self, cores: List[int]): + def after_benchmarking(self, cores: List[int]) -> None: + """Restore environment settings after benchmarking. + + This method restores the system to its previous state after + benchmarking is complete: + - Re-enables CPU boost/turbo + - Re-enables hyperthreading + - Restores frequency settings + + Args: + cores: List of physical core IDs to restore + """ self.enable_boost(cores) self.enable_hyperthreading(cores) self.unset_frequency() diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 30fe7274..fd64efd0 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -1,8 +1,25 @@ +"""Container eviction model experiment implementation. + +This module provides the EvictionModel experiment implementation, which +measures how serverless platforms manage function container eviction. +It determines how long idle containers are kept alive before being +recycled by the platform, which affects cold start frequency. + +The experiment involves invoking functions at increasing time intervals +and observing when cold starts occur, thus inferring the platform's +container caching and eviction policies. + +This implemnetation is slightly different than the original one, +which used the 010.sleep benchmark. Here, we use the 040.server-reply +to double check that all functions are "alive" at the same time. +However, the sleep logic is not currently implemented in 040.server-reply. +""" + import logging import os import time from datetime import datetime -from typing import List, Optional, Tuple, TYPE_CHECKING +from typing import List, Optional, Tuple, TYPE_CHECKING, Dict, Any import multiprocessing from multiprocessing.pool import AsyncResult, ThreadPool @@ -17,7 +34,28 @@ class EvictionModel(Experiment): - + """Container eviction model experiment. + + This experiment measures how serverless platforms manage function + container eviction. It determines how long idle containers are kept + alive before being recycled by the platform, which affects cold start + frequency. + + The experiment invokes functions at different time intervals (defined + in the 'times' list) and observes when cold starts occur, thus inferring + the platform's container caching and eviction policies. + + Attributes: + times: List of time intervals (in seconds) between invocations + _function: Function to invoke + _trigger: Trigger to use for invocation + _out_dir: Directory for storing results + _deployment_client: Deployment client to use + _sebs_client: SeBS client + """ + + # Time intervals (in seconds) between invocations + # Uncomment additional intervals as needed for longer tests times = [ 1, # 2, @@ -43,19 +81,46 @@ class EvictionModel(Experiment): function_copies_per_time = 1 def __init__(self, config: ExperimentConfig): + """Initialize a new EvictionModel experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "eviction-model" + """ return "eviction-model" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.EvictionModel" + """ return "Experiment.EvictionModel" @staticmethod - def accept_replies(port: int, invocations: int): + def accept_replies(port: int, invocations: int) -> None: + """Accept TCP connections from functions and respond to them. + + This static method acts as a TCP server, accepting connections from + functions and responding to them. It runs two rounds of connection + acceptance to ensure functions receive a response. The method logs + all activity to a file. + + This is used by the '040.server-reply' benchmark to confirm function execution. + Args: + port: TCP port to listen on + invocations: Number of expected function invocations + """ with open(f"server_{invocations}.log", "w") as f: import socket @@ -95,7 +160,31 @@ def accept_replies(port: int, invocations: int): s.close() @staticmethod - def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payload: dict): + def execute_instance( + sleep_time: int, pid: int, tid: int, func: Function, payload: dict + ) -> dict: + """Execute a single instance of the eviction model test. + + This method performs two invocations of a function with a sleep interval + between them. The first invocation should be a cold start, and the second + will indicate whether the container was evicted during the sleep period. + + This function is intended to be run in a separate thread; it performs two + synchronous HTTP invocations of the given function. + + Args: + sleep_time: Time to sleep between invocations (seconds) + pid: Process ID for logging + tid: Thread ID for logging + func: Function to invoke + payload: Payload to send to the function + + Returns: + Dictionary with invocation results and timing information + + Raises: + RuntimeError: If the first invocation fails + """ try: print(f"Process {pid} Thread {tid} Invoke function {func.name} with {payload} now!") @@ -113,7 +202,7 @@ def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payloa time_spent = float(datetime.now().strftime("%s.%f")) - float(end.strftime("%s.%f")) seconds_sleep = sleep_time - time_spent - print(f"PID {pid} TID {tid} with time {time}, sleep {seconds_sleep}") + print(f"PID {pid} TID {tid} with time {sleep_time}, sleep {seconds_sleep}") time.sleep(seconds_sleep) try: @@ -139,7 +228,27 @@ def process_function( functions: List[Function], times: List[int], payload: dict, - ): + ) -> List[dict]: + """Process a function with multiple time intervals. + + This method executes multiple functions with different sleep times + in parallel, starting with the largest sleep time to overlap executions. + The total time should be equal to the maximum execution time. + + Args: + repetition: Current repetition number + pid: Process ID for logging + invocations: Number of invocations to perform + functions: List of functions to invoke + times: List of sleep times corresponding to functions + payload: Payload to send to functions + + Returns: + List of dictionaries containing invocation results + + Raises: + RuntimeError: If any execution fails + """ b = multiprocessing.Semaphore(invocations) print(f"Begin at PID {pid}, repetition {repetition}") @@ -176,31 +285,55 @@ def process_function( raise RuntimeError() return final_results - def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: + """Prepare the experiment for execution. + This method sets up the benchmark, functions, and output directory for + the experiment. Retrieves the '040.server-reply' benchmark, sets up result storage, + and creates a separate function for each time interval and copy combination, + allowing for parallel testing of different eviction times. + + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + """ + # Get the server-reply benchmark self._benchmark = sebs_client.get_benchmark( "040.server-reply", deployment_client, self.config ) self._deployment_client = deployment_client self._result = ExperimentResult(self.config, deployment_client.config) + + # Create function names for each time interval and copy name = deployment_client.default_function_name(self._benchmark) self.functions_names = [ f"{name}-{time}-{copy}" for time in self.times for copy in range(self.function_copies_per_time) ] + + # Create output directory self._out_dir = os.path.join(sebs_client.output_dir, "eviction-model") if not os.path.exists(self._out_dir): os.mkdir(self._out_dir) + self.functions = [] for fname in self.functions_names: - # if self._benchmark.functions and fname in self._benchmark.functions: - # self.logging.info(f"Skip {fname}, exists already.") - # continue self.functions.append(deployment_client.get_function(self._benchmark, func_name=fname)) - def run(self): + def run(self) -> None: + """Execute the eviction model experiment. + + This method runs the main eviction model experiment by: + 1. Setting up server instances to handle function responses + 2. Executing parallel invocations with different sleep times + 3. Collecting and storing results + + The experiment determines container eviction patterns by measuring + whether functions experience cold starts after different idle periods. + """ settings = self.config.experiment_settings(self.name()) invocations = settings["invocations"] @@ -216,7 +349,7 @@ def run(self): # flake8 issue # https://github.com/PyCQA/pycodestyle/issues/373 functions = self.functions[invocation_idx :: self.function_copies_per_time] # noqa - results = {} + results: Dict[int, List[List[Dict[str, Any]]]] = {} # Disable logging - otherwise we have RLock that can't get be pickled for func in functions: @@ -249,8 +382,8 @@ def run(self): # time.sleep(5) for _, t in enumerate(self.times): results[t].append([]) - local_results = [] - servers_results = [] + local_results: List[AsyncResult] = [] + servers_results: List[AsyncResult] = [] """ Start M server instances. Each one handles one set of invocations. @@ -281,11 +414,11 @@ def run(self): Rethrow exceptions if appear """ for result in servers_results: - ret = result.get() + result.get() for result in local_results: - ret = result.get() - for i, val in enumerate(ret): + local_ret = result.get() + for i, val in enumerate(local_ret): results[self.times[i]][-1].append(val) """ @@ -295,15 +428,5 @@ def run(self): # verify_results(results) with open(os.path.join(self._out_dir, fname), "w") as out_f: - # print(results) print(f"Write results to {os.path.join(self._out_dir, fname)}") out_f.write(serialize(results)) - # func = self._deployment_client.get_function( - # self._benchmark, self.functions_names[0] - # ) - # self._deployment_client.enforce_cold_start(func) - # ret = func.triggers[0].async_invoke(payload) - # result = ret.result() - # print(result.stats.cold_start) - # self._result.add_invocation(func, result) - # print(serialize(self._result)) diff --git a/sebs/experiments/experiment.py b/sebs/experiments/experiment.py index ee5a456f..86c28732 100644 --- a/sebs/experiments/experiment.py +++ b/sebs/experiments/experiment.py @@ -1,5 +1,19 @@ -from abc import ABC -from abc import abstractmethod +"""Base abstract class for implementing serverless benchmark experiments. + +This module provides the base Experiment abstract class that defines the common +interface and functionality for all benchmark experiments in the serverless +benchmarking suite. Each experiment type inherits from this class and implements +its specific logic for executing benchmarks, measuring performance, and analyzing +results. + +The Experiment class handles: +- Configuration management +- Parallel invocation coordination +- Logging setup +- Type and name identification for experiments +""" + +from abc import ABC, abstractmethod from multiprocessing import Semaphore # from multiprocessing.pool import ThreadPool @@ -9,7 +23,26 @@ class Experiment(ABC, LoggingBase): + """Abstract base class for all serverless benchmark experiments. + + This class provides the common functionality and interface for all + experiment implementations. It manages configuration, handles logging, + and defines the abstract methods that must be implemented by specific + experiment types. + + Attributes: + config: Experiment configuration settings + _threads: Number of concurrent threads to use for the experiment + _invocations: Number of function invocations to perform + _invocation_barrier: Semaphore for coordinating parallel invocations + """ + def __init__(self, cfg: ExperimentConfig): + """Initialize a new experiment. + + Args: + cfg: Experiment configuration settings + """ super().__init__() self._config = cfg self._threads = 1 @@ -17,15 +50,38 @@ def __init__(self, cfg: ExperimentConfig): self._invocation_barrier = Semaphore(self._invocations) @property - def config(self): + def config(self) -> ExperimentConfig: + """Get the experiment configuration. + + Returns: + The experiment configuration + """ return self._config @staticmethod @abstractmethod def name() -> str: + """Get the name of the experiment. + + This method must be implemented by all subclasses to return + a unique name for the experiment type, which is used for + configuration and identification. + + Returns: + A string name for the experiment + """ pass @staticmethod @abstractmethod def typename() -> str: + """Get the type name of the experiment. + + This method must be implemented by all subclasses to return + a human-readable type name for the experiment, which is used + for display and reporting. + + Returns: + A string type name for the experiment + """ pass diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index d7fc56f7..efe502ad 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -1,9 +1,26 @@ +"""Invocation overhead measurement experiment implementation. + +This module provides the InvocationOverhead experiment implementation, which +measures the overhead associated with invoking serverless functions. It can +measure: + +- Overhead of different invocation methods (HTTP, SDK) +- Impact of code package size on deployment and invocation time +- Overhead of different input data sizes +- Cold vs. warm start invocation times + +The experiment is designed to help identify performance bottlenecks and +optimize function deployment and invocation. +We deploy microbenchmark 030.clock-synchronization to exactly measure the +network latency between client and function. +""" + import csv import os import random import time from datetime import datetime -from typing import Dict, TYPE_CHECKING +from typing import Dict, List, TYPE_CHECKING, Union from sebs.benchmark import Benchmark from sebs.faas.system import System as FaaSSystem @@ -15,10 +32,33 @@ class CodePackageSize: + """Helper class for code package size experiments. + + This class handles creating and deploying functions with different code + package sizes to measure the impact of package size on deployment and + invocation overhead. + + Attributes: + _benchmark_path: Path to the benchmark code + _benchmark: Benchmark instance + _deployment_client: Deployment client to use + sizes: List of code package sizes to test + functions: Dictionary mapping size to function instances + """ + def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings: dict): + """Initialize a new code package size experiment. + + Args: + deployment_client: Deployment client to use + benchmark: Benchmark instance + settings: Experiment settings with code_package_begin, code_package_end, + and code_package_points values + """ import math from numpy import linspace + # Generate code package sizes to test points = linspace( settings["code_package_begin"], settings["code_package_end"], @@ -26,6 +66,7 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings ) from sebs.utils import find_benchmark + # Use the clock synchronization benchmark as a base self._benchmark_path = find_benchmark("030.clock-synchronization", "benchmarks") self._benchmark = benchmark random.seed(1410) @@ -37,7 +78,16 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings self._deployment_client = deployment_client self._benchmark = benchmark - def before_sample(self, size: int, input_benchmark: dict): + def before_sample(self, size: int, input_benchmark: dict) -> None: + """Prepare the benchmark with a specific code package size. + + Creates a file named 'randomdata.bin' with the specified size of random bytes + within the benchmark's code package. Then, updates the function on the deployment. + + Args: + size: Size of the code package to create + input_benchmark: Benchmark input configuration (unused) + """ arr = bytearray((random.getrandbits(8) for i in range(size))) self._benchmark.code_package_modify("randomdata.bin", bytes(arr)) function = self._deployment_client.get_function(self._benchmark) @@ -45,7 +95,22 @@ def before_sample(self, size: int, input_benchmark: dict): class PayloadSize: - def __init__(self, settings: dict): + """Helper class for payload size experiments. + + This class handles creating different payload sizes to measure the impact + of input data size on function invocation overhead. + + Attributes: + pts: List of payload sizes to test + """ + + def __init__(self, settings: dict) -> None: + """Initialize a new payload size experiment. + + Args: + settings: Experiment settings with payload_begin, payload_end, + and payload_points values + """ from numpy import linspace points = linspace( @@ -55,7 +120,15 @@ def __init__(self, settings: dict): ) self.pts = [int(pt) for pt in points] - def before_sample(self, size: int, input_benchmark: dict): + def before_sample(self, size: int, input_benchmark: dict) -> None: + """Prepare the benchmark input with a specific payload size. + + Generates different payload sizes by creating base64 encoded byte arrays. + + Args: + size: Size of the payload to create + input_benchmark: Benchmark input configuration to modify + """ import base64 from io import BytesIO @@ -65,23 +138,59 @@ def before_sample(self, size: int, input_benchmark: dict): class InvocationOverhead(Experiment): + """Invocation overhead measurement experiment. + + This experiment measures the overhead associated with invoking serverless + functions. It can measure the impact of code package size, input data size, + and different invocation methods on performance. + + Attributes: + settings: Experiment-specific settings + _benchmark: Benchmark to use + benchmark_input: Input data for the benchmark + _storage: Storage service to use + _function: Function to invoke + _code_package: Code package size experiment helper + _out_dir: Directory for storing results + _deployment_client: Deployment client to use + _sebs_client: SeBS client + """ + def __init__(self, config: ExperimentConfig): + """Initialize a new InvocationOverhead experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) self.settings = self.config.experiment_settings(self.name()) - def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: + """Prepare the experiment for execution. + + This method sets up the benchmark, function, storage, and output directory + for the experiment. It uses the clock-synchronization benchmark as a base + and prepares the necessary resources for measuring invocation overhead. - # deploy network test function + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + """ + # Import needed modules from sebs import SeBS # noqa from sebs.faas.function import Trigger + # Get the clock-synchronization benchmark self._benchmark = sebs_client.get_benchmark( "030.clock-synchronization", deployment_client, self.config ) + # Prepare benchmark input self.benchmark_input = self._benchmark.prepare_input( deployment_client.system_resources, size="test", replace_existing=True ) + + # Get storage for testing self._storage = deployment_client.system_resources.get_storage(replace_existing=True) self._function = deployment_client.get_function(self._benchmark) @@ -102,7 +211,18 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): self._deployment_client = deployment_client - def run(self): + def run(self) -> None: + """Execute the invocation overhead experiment. + + This method runs the main experiment by: + 1. Setting up either code package size or payload size experiments + 2. Running warm-up and cold start invocations + 3. Measuring invocation overhead for different sizes + (either code package or payload, based on settings) + 4. Collecting and storing results in CSV format, + including client-side and server-side timestamps + + """ from requests import get @@ -110,6 +230,7 @@ def run(self): repetitions = self.settings["repetitions"] N = self.settings["N"] + experiment: Union[CodePackageSize, PayloadSize] if self.settings["type"] == "code": experiment = CodePackageSize(self._deployment_client, self._benchmark, self.settings) else: @@ -175,7 +296,21 @@ def process( directory: str, logging_filename: str, extend_time_interval: int, - ): + ) -> None: + """Process experiment results and generate summary statistics. + + This method processes the raw experiment results by: + 1. Loading client-side timing data from CSV files + and server-side UDP datagram timestamps + 2. Computing clock drift and Round-Trip Time (RTT) + 3. Creating a processed results file with invocation times + + Args: + sebs_client: SeBS client instance + deployment_client: Deployment client instance + directory: Directory containing experiment results + logging_filename: Name of the logging file (unused) + """ import pandas as pd import glob from sebs import SeBS # noqa @@ -237,7 +372,32 @@ def process( invocation_time = float(row[5]) - float(row[4]) - float(row[3]) + clock_drift writer.writerow(row + [clock_drift, clock_drift_std, invocation_time]) - def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, ip: str): + def receive_datagrams( + self, input_benchmark: dict, repetitions: int, port: int, ip: str + ) -> List: + """Receive UDP datagrams from the function for clock synchronization. + + This method implements a UDP server that communicates with the function + to measure clock synchronization and network timing. + It opens a UDP socket, triggers an asynchronous function invocation, and then + listens for a specified number of datagrams, recording timestamps for + received and sent datagrams. + + Saves server-side timestamps to a CSV file named `server-{request_id}.csv`. + + Args: + input_benchmark: Benchmark input configuration + repetitions: Number of repetitions to perform + port: UDP port to listen on + ip: IP address of the client + + Returns: + List containing invocation results: [is_cold, connection_time, + start_timestamp, finish_timestamp, request_id] + + Raises: + RuntimeError: If function invocation fails + """ import socket @@ -306,8 +466,18 @@ def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "invocation-overhead" + """ return "invocation-overhead" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.InvocOverhead" + """ return "Experiment.InvocOverhead" diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index 6c44f848..640ab7bd 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -1,3 +1,11 @@ +"""Network latency and throughput measurement experiment implementation. + +This module provides the NetworkPingPong experiment implementation, which +measures network latency and throughput characteristics between client and +serverless functions. It determines various latency characteristics of the network +connection in the cloud. +""" + import csv import socket import os @@ -20,35 +28,81 @@ class NetworkPingPong(Experiment): + """Network latency and throughput measurement experiment. + + This experiment measures the network RTT (Round-Trip Time) using a ping-pong mechanism. + Deploys the '020.network-benchmark' which echoes back UDP datagrams. + The experiment sends a series of datagrams and measures the time taken + for each to return. This experiment measures the network performance characteristics + between the client and serverless functions. + + + Attributes: + benchmark_input: Input configuration for the benchmark + _storage: Storage service to use for testing + _function: Function to invoke + _triggers: Dictionary of triggers by type + _out_dir: Directory for storing results + _deployment_client: Deployment client to use + _sebs_client: SeBS client + """ + def __init__(self, config: ExperimentConfig): + """Initialize a new NetworkPingPong experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) - def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: + """Prepare the experiment for execution. + This method sets up the '020.network-benchmark' benchmark, triggers, storage, + and output directory for the experiment. It creates or gets the function and + its HTTP trigger, and prepares the input data for the benchmark. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + """ + # Get the network benchmark benchmark = sebs_client.get_benchmark( "020.network-benchmark", deployment_client, self.config ) + # Prepare benchmark input self.benchmark_input = benchmark.prepare_input( deployment_client.system_resources, size="test", replace_existing=True ) + + # Get storage for testing storage latency self._storage = deployment_client.system_resources.get_storage(replace_existing=True) + # Get or create function self._function = deployment_client.get_function(benchmark) + # Create output directory self._out_dir = os.path.join(sebs_client.output_dir, "network-ping-pong") if not os.path.exists(self._out_dir): # shutil.rmtree(self._out_dir) os.mkdir(self._out_dir) + # Make sure there's an HTTP trigger triggers = self._function.triggers(Trigger.TriggerType.HTTP) if len(triggers) == 0: deployment_client.create_trigger(self._function, Trigger.TriggerType.HTTP) - def run(self): + def run(self) -> None: + """Run the network ping-pong experiment. + This method executes the experiment, measuring network latency and + throughput between the client and the serverless function. It first + determines the client's public IP address to include in the results. + """ from requests import get + # Get the client's public IP address ip = get("http://checkip.amazonaws.com/").text.rstrip() settings = self.config.experiment_settings(self.name()) invocations = settings["invocations"] @@ -66,9 +120,17 @@ def run(self): time.sleep(5) self._storage.download_bucket(self.benchmark_input["output-bucket"], self._out_dir) - def process(self, directory: str): + def process(self, directory: str) -> None: + """Process the experiment results. + + This method processes the CSV files generated during the experiment + execution, computes round-trip times (RTT), and generates summary + statistics and a histogram of the RTT distribution. - full_data: Dict[str, pd.Dataframe] = {} + Args: + directory: Directory containing the experiment results + """ + full_data: Dict[str, pd.DataFrame] = {} for f in glob.glob(os.path.join(directory, "network-ping-pong", "*.csv")): request_id = os.path.basename(f).split("-", 1)[1].split(".")[0] @@ -93,8 +155,18 @@ def process(self, directory: str): fig = ax.get_figure() fig.savefig(os.path.join(directory, "histogram.png")) - def receive_datagrams(self, repetitions: int, port: int, ip: str): + def receive_datagrams(self, repetitions: int, port: int, ip: str) -> None: + """Receive UDP datagrams from the function and respond to them. + This method acts as a UDP server, receiving datagrams from the function + and responding to them. It measures the timestamps of packet reception + and response, and records them for later analysis. + + Args: + repetitions: Number of repetitions to execute + port: UDP port to listen on + ip: IP address to include in the function invocation input + """ print(f"Starting invocation with {repetitions} repetitions on port {port}") socket.setdefaulttimeout(2) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) @@ -143,8 +215,18 @@ def receive_datagrams(self, repetitions: int, port: int, ip: str): @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "network-ping-pong" + """ return "network-ping-pong" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.NetworkPingPong" + """ return "Experiment.NetworkPingPong" diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 7b940f8d..faa669a5 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -1,3 +1,18 @@ +"""Performance and cost measurement experiment implementation. + +This module provides the PerfCost experiment implementation, which measures +the performance characteristics and execution costs of serverless functions. +It can run several experiment types: + +- Cold: Measures cold start performance by enforcing container recreation +- Warm: Measures warm execution performance with reused containers +- Burst: Measures performance under concurrent burst load +- Sequential: Measures performance with sequential invocations + +The experiment collects detailed metrics about execution time, memory usage, +and costs, and provides statistical analysis of the results. +""" + import json import os import time @@ -19,44 +34,103 @@ class PerfCost(Experiment): + """Performance and cost measurement experiment. + + This experiment measures the performance characteristics and execution + costs of serverless functions under different execution conditions. + It can measure cold starts, warm execution, burst load, and sequential + execution patterns. + + The experiment can be configured to run with different memory sizes, + allowing for comparison of performance across different resource allocations. + + Attributes: + _benchmark: The benchmark to execute + _benchmark_input: The input data for the benchmark + _function: The function to invoke + _trigger: The trigger to use for invocation + _out_dir: Directory for storing results + _deployment_client: The deployment client to use + _sebs_client: The SeBS client + """ + def __init__(self, config: ExperimentConfig): + """Initialize a new PerfCost experiment. + + Args: + config: Experiment configuration + """ super().__init__(config) @staticmethod def name() -> str: + """Get the name of the experiment. + + Returns: + The name "perf-cost" + """ return "perf-cost" @staticmethod def typename() -> str: + """Get the type name of the experiment. + + Returns: + The type name "Experiment.PerfCost" + """ return "Experiment.PerfCost" class RunType(Enum): + """Types of experiment runs. + + This enum defines the different types of experiment runs: + - WARM: Measure warm execution performance (reused containers) + - COLD: Measure cold start performance (new containers) + - BURST: Measure performance under concurrent burst load + - SEQUENTIAL: Measure performance with sequential invocations + """ + WARM = 0 COLD = 1 BURST = 2 SEQUENTIAL = 3 def str(self) -> str: + """Get the string representation of the run type. + + Returns: + The lowercase name of the run type + """ return self.name.lower() - def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem) -> None: + """Prepare the experiment for execution. - # create benchmark instance + This method sets up the benchmark, function, trigger, and output + directory for the experiment. It creates or gets the function and + its HTTP trigger, and prepares the input data for the benchmark. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + """ + # Create benchmark instance settings = self.config.experiment_settings(self.name()) self._benchmark = sebs_client.get_benchmark( settings["benchmark"], deployment_client, self.config ) - # prepare benchmark input + # Prepare benchmark input self._benchmark_input = self._benchmark.prepare_input( deployment_client.system_resources, size=settings["input-size"], replace_existing=self.config.update_storage, ) + # Get or create function self._function = deployment_client.get_function(self._benchmark) - # add HTTP trigger + # Add HTTP trigger if not already present triggers = self._function.triggers(Trigger.TriggerType.HTTP) if len(triggers) == 0: self._trigger = deployment_client.create_trigger( @@ -65,33 +139,67 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): else: self._trigger = triggers[0] + # Create output directory self._out_dir = os.path.join(sebs_client.output_dir, "perf-cost") if not os.path.exists(self._out_dir): os.mkdir(self._out_dir) + + # Save clients for later use self._deployment_client = deployment_client self._sebs_client = sebs_client - def run(self): + def run(self) -> None: + """Run the experiment. + This method runs the experiment with the configured settings. + If memory sizes are specified, it runs the experiment for each + memory size, updating the function configuration accordingly. + Otherwise, it runs the experiment once with the default memory + configuration. + """ settings = self.config.experiment_settings(self.name()) - # Execution on systems where memory configuration is not provided + # Get memory sizes to test memory_sizes = settings["memory-sizes"] + + # Run with default memory if no specific sizes are provided if len(memory_sizes) == 0: - self.logging.info("Begin experiment") + self.logging.info("Begin experiment with default memory configuration") self.run_configuration(settings, settings["repetitions"]) + + # Run for each specified memory size for memory in memory_sizes: self.logging.info(f"Begin experiment on memory size {memory}") + # Update function memory configuration self._function.config.memory = memory - self._deployment_client.update_function(self._function, self._benchmark, False, "") + self._deployment_client.update_function( + self._function, + self._benchmark, + self._benchmark.container_deployment, + self._benchmark.container_uri if self._benchmark.container_deployment else "", + ) self._sebs_client.cache_client.update_function(self._function) + # Run experiment with this memory configuration self.run_configuration(settings, settings["repetitions"], suffix=str(memory)) - def compute_statistics(self, times: List[float]): + def compute_statistics(self, times: List[float]) -> None: + """Compute statistical analysis of execution times. + + This method computes basic statistics (mean, median, standard deviation, + coefficient of variation) and confidence intervals for the given times. + It computes both parametric (Student's t-distribution) and non-parametric + confidence intervals. + Args: + times: List of execution times in milliseconds + """ + # Compute basic statistics mean, median, std, cv = basic_stats(times) self.logging.info(f"Mean {mean} [ms], median {median} [ms], std {std}, CV {cv}") + + # Compute confidence intervals for different confidence levels for alpha in [0.95, 0.99]: + # Parametric confidence interval (Student's t-distribution) ci_interval = ci_tstudents(alpha, times) interval_width = ci_interval[1] - ci_interval[0] ratio = 100 * interval_width / mean / 2.0 @@ -100,6 +208,8 @@ def compute_statistics(self, times: List[float]): f"{ci_interval[0]} to {ci_interval[1]}, within {ratio}% of mean" ) + # Non-parametric confidence interval (Le Boudec's method) + # Only compute if we have enough samples (> 20) if len(times) > 20: ci_interval = ci_le_boudec(alpha, times) interval_width = ci_interval[1] - ci_interval[0] @@ -116,7 +226,21 @@ def _run_configuration( invocations: int, repetitions: int, suffix: str = "", - ): + ) -> None: + """Run a specific experiment configuration. + + This method executes the experiment with the specified run type, + collecting and recording the results. It handles different run types + (cold, warm, burst, sequential) appropriately, enforcing cold starts + when needed and collecting execution statistics. + + Args: + run_type: Type of run (cold, warm, burst, sequential) + settings: Experiment settings + invocations: Number of concurrent invocations + repetitions: Total number of repetitions to run + suffix: Optional suffix for output file names (e.g., memory size) + """ # Randomize starting value to ensure that it's not the same # as in the previous run. @@ -226,10 +350,25 @@ def _run_configuration( ) ) - def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): + def run_configuration(self, settings: dict, repetitions: int, suffix: str = "") -> None: + """Run experiments for each configured experiment type. + + This method runs the experiment for each experiment type specified + in the settings. It dispatches to the appropriate run type handler + for each experiment type. + Args: + settings: Experiment settings + repetitions: Number of repetitions to run + suffix: Optional suffix for output file names (e.g., memory size) + + Raises: + RuntimeError: If an unknown experiment type is specified + """ + # Run each configured experiment type for experiment_type in settings["experiments"]: if experiment_type == "cold": + # Cold start experiments - enforce container recreation self._run_configuration( PerfCost.RunType.COLD, settings, @@ -238,6 +377,7 @@ def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): suffix, ) elif experiment_type == "warm": + # Warm execution experiments - reuse containers self._run_configuration( PerfCost.RunType.WARM, settings, @@ -246,6 +386,7 @@ def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): suffix, ) elif experiment_type == "burst": + # Burst load experiments - concurrent invocations self._run_configuration( PerfCost.RunType.BURST, settings, @@ -254,6 +395,7 @@ def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): suffix, ) elif experiment_type == "sequential": + # Sequential invocation experiments - one at a time self._run_configuration( PerfCost.RunType.SEQUENTIAL, settings, 1, repetitions, suffix ) @@ -267,7 +409,21 @@ def process( directory: str, logging_filename: str, extend_time_interval: int, - ): + ) -> None: + """Process experiment results and generate a CSV report. + + This method processes the experiment results, downloads additional + metrics if needed, and generates a CSV report with the results. + The report includes memory usage, execution times, and other metrics + for each experiment type and invocation. + + Args: + sebs_client: The SeBS client to use + deployment_client: The deployment client to use + directory: Directory where results are stored + logging_filename: Filename for logs + extend_time_interval: Time interval to extend metrics retrieval by (in minutes) + """ import glob import csv @@ -334,7 +490,8 @@ def process( times = experiments.times() deployment_client.download_metrics( func, - *times, + int(times[0]), + int(times[1]), experiments.invocations(func), experiments.metrics(func), ) diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index b28de75c..a8cb9c7c 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -1,3 +1,15 @@ +"""Experiment result collection and management. + +This module provides the Result class for managing experiment results, including: +- Function invocation results +- Metrics from cloud providers +- Experiment start and end times +- Configuration information + +The Result class handles serialization, deserialization, and analysis of +experiment results, making it easier to process and visualize the data. +""" + from datetime import datetime from typing import Dict, List, Optional, Tuple # noqa @@ -9,6 +21,23 @@ class Result: + """Experiment result collection and management. + + This class stores and manages the results of experiments, including function + invocation results, metrics from cloud providers, and configuration information. + It provides methods for adding invocation results, retrieving metrics, and + serializing/deserializing results. + + Attributes: + config: Dictionary containing experiment and deployment configurations + _invocations: Dictionary mapping function names to invocation results + _metrics: Dictionary mapping function names to metrics + _start_time: Experiment start time + _end_time: Experiment end time + result_bucket: Optional bucket name for storing results + logging_handlers: Logging handlers for the result + """ + def __init__( self, experiment_config: ExperimentConfig, @@ -17,6 +46,15 @@ def __init__( metrics: Optional[Dict[str, dict]] = None, result_bucket: Optional[str] = None, ): + """Initialize a new experiment result. + + Args: + experiment_config: Experiment configuration + deployment_config: Deployment configuration + invocations: Optional dictionary of function invocation results + metrics: Optional dictionary of function metrics + result_bucket: Optional bucket name for storing results + """ self.config = { "experiments": experiment_config, "deployment": deployment_config, @@ -31,43 +69,111 @@ def __init__( self._metrics = metrics self.result_bucket = result_bucket - def begin(self): + def begin(self) -> None: + """Mark the beginning of the experiment. + + This method records the start time of the experiment. + """ self.begin_time = datetime.now().timestamp() - def end(self): + def end(self) -> None: + """Mark the end of the experiment. + + This method records the end time of the experiment. + """ self.end_time = datetime.now().timestamp() - def times(self) -> Tuple[int, int]: + def times(self) -> Tuple[float, float]: + """Get the start and end times of the experiment. + + Returns: + Tuple of (start_time, end_time) as Unix timestamps + """ return self.begin_time, self.end_time - def add_result_bucket(self, result_bucket: str): + def add_result_bucket(self, result_bucket: str) -> None: + """Set the result bucket for storing experiment results. + + Args: + result_bucket: Name of the bucket to store results in + """ self.result_bucket = result_bucket - def add_invocation(self, func: Function, invocation: ExecutionResult): - # the function has most likely failed, thus no request id + def add_invocation(self, func: Function, invocation: ExecutionResult) -> None: + """Add an invocation result for a specific function. + + If the invocation doesn't have a request ID (likely due to failure), + a synthetic ID is generated. + + Args: + func: Function the invocation belongs to + invocation: Execution result to add + """ + # The function has most likely failed, thus no request id if invocation.request_id: req_id = invocation.request_id else: req_id = f"failed-{len(self._invocations.get(func.name, []))}" + # Add to existing invocations or create new entry if func.name in self._invocations: self._invocations.get(func.name)[req_id] = invocation # type: ignore else: self._invocations[func.name] = {req_id: invocation} def functions(self) -> List[str]: + """Get a list of all function names in the results. + + Returns: + List of function names + """ return list(self._invocations.keys()) def invocations(self, func: str) -> Dict[str, ExecutionResult]: + """Get invocation results for a specific function. + + Args: + func: Name of the function to get invocation results for + + Returns: + Dictionary mapping request IDs to execution results + + Raises: + KeyError: If function name is not found in results + """ return self._invocations[func] def metrics(self, func: str) -> dict: + """Get metrics for a specific function. + + If no metrics exist for the function, an empty dictionary is created + and returned. + + Args: + func: Name of the function to get metrics for + + Returns: + Dictionary of metrics for the function + """ if func not in self._metrics: self._metrics[func] = {} return self._metrics[func] @staticmethod def deserialize(cached_config: dict, cache: Cache, handlers: LoggingHandlers) -> "Result": + """Deserialize a result from a dictionary representation. + + This static method creates a new Result object from a dictionary + representation, which may have been loaded from a file or cache. + + Args: + cached_config: Dictionary representation of the result + cache: Cache instance for resolving references + handlers: Logging handlers for the result + + Returns: + A new Result object with settings from the dictionary + """ invocations: Dict[str, dict] = {} for func, func_invocations in cached_config["_invocations"].items(): invocations[func] = {} diff --git a/sebs/experiments/startup_time.py b/sebs/experiments/startup_time.py deleted file mode 100644 index 3b7e9520..00000000 --- a/sebs/experiments/startup_time.py +++ /dev/null @@ -1,15 +0,0 @@ -from sebs.experiments.experiment import Experiment -from sebs.experiments.config import Config as ExperimentConfig - - -class StartupTime(Experiment): - def __init__(self, config: ExperimentConfig): - super().__init__(config) - - @staticmethod - def name() -> str: - return "startup-time" - - @staticmethod - def typename() -> str: - return "Experiment.StartupTime" diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 19c7d3ab..1187137f 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -1,3 +1,30 @@ +"""Configuration management for Function-as-a-Service (FaaS) systems. + +This module provides abstract base classes for managing configurations across +different FaaS platforms (AWS Lambda, Azure Functions, Google Cloud Functions, +OpenWhisk, etc.). It defines the core interfaces for: + +- Credentials management and authentication +- Resource allocation and management +- Platform-specific configuration settings +- Configuration serialization and caching + +The module follows a hierarchical structure where each platform implements these +abstract classes with their specific authentication methods, resource types, +and configuration parameters. All configurations support caching to avoid +repeated initialization and provide persistence across benchmark runs. + +Classes: + Credentials: Abstract base for platform authentication credentials + Resources: Abstract base for cloud resource management + Config: Abstract base for complete platform configuration + +The credentials initialization follows this precedence order: +1. Load credentials with values provided in config +2. Fall back to environment variables +3. Report failure if no credentials are available +""" + from __future__ import annotations from abc import ABC @@ -8,66 +35,117 @@ from sebs.cache import Cache from sebs.utils import has_platform, LoggingBase, LoggingHandlers -# FIXME: Replace type hints for static generators after migration to 3.7 -# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel -""" - Credentials for FaaS system used to authorize operations on functions - and other resources. - - The order of credentials initialization: - 1. Load credentials from cache. - 2. If any new values are provided in the config, they override cache values. - 3. If nothing is provided, initialize using environmental variables. - 4. If no information is provided, then failure is reported. -""" +class Credentials(ABC, LoggingBase): + """Abstract base class for FaaS platform authentication credentials. + This class defines the interface for managing authentication credentials + across different FaaS platforms. Each platform implementation provides + specific credential types (API keys, service account files, connection + strings, etc.) while following the common serialization and caching + patterns defined here. + """ -class Credentials(ABC, LoggingBase): def __init__(self): + """Initialize the credentials base class with logging support.""" super().__init__() - """ - Create credentials instance from user config and cached values. - """ - @staticmethod @abstractmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Credentials": - pass + """Create credentials instance from user config and cached values. - """ - Serialize to JSON for storage in cache. - """ + This method implements the credential loading hierarchy: + 1. Use new config values if provided + 2. Load from environment variables + 3. Fail if no credentials available - @abstractmethod - def serialize(self) -> dict: + Credentials are NOT cached. + + Args: + config: User-provided configuration dictionary + cache: Cache instance for loading stored credentials + handlers: Logging handlers for error reporting + + Returns: + Credentials: Platform-specific credentials instance + + Raises: + RuntimeError: If no valid credentials can be loaded + """ pass + @abstractmethod + def serialize(self) -> dict: + """Serialize credentials to dictionary for cache storage. -""" - Class grouping resources allocated at the FaaS system to execute functions - and deploy various services. Examples might include IAM roles and API gateways - for HTTP triggers. + Returns: + dict: Serialized credential data suitable for JSON storage - Storage resources are handled seperately. -""" + Note: + Implementations should be careful about storing sensitive + information and may choose to exclude certain fields. + """ + pass class Resources(ABC, LoggingBase): + """Abstract base class for FaaS platform resource management. + + This class manages cloud resources allocated for function execution and + deployment across different FaaS platforms. Resources include infrastructure + components like IAM roles, API gateways, networking components, and storage + buckets needed to support serverless function deployment and execution. + + Storage resources (object storage, NoSQL databases) are handled separately + through dedicated storage classes, while this class focuses on compute + and deployment infrastructure. + + Key responsibilities: + - Resource ID management and generation + - Storage bucket lifecycle management + - Platform-specific resource provisioning + - Resource serialization and caching + - Resource cleanup and deallocation + """ + class StorageBucketType(str, Enum): + """Enumeration of storage bucket types used by SeBS. + + Different bucket types serve different purposes in the benchmarking workflow: + - DEPLOYMENT: Stores function deployment packages (ZIP files, containers) + - BENCHMARKS: Stores benchmark input data and test files + - EXPERIMENTS: Stores experiment results and output data + """ + DEPLOYMENT = "deployment" BENCHMARKS = "benchmarks" EXPERIMENTS = "experiments" @staticmethod - def deserialize(val: str) -> Resources.StorageBucketType: + def deserialize(val: str) -> "Resources.StorageBucketType": + """Deserialize a string value to a StorageBucketType enum. + + Args: + val: String value to convert to enum + + Returns: + StorageBucketType: Corresponding enum value + + Raises: + Exception: If the value doesn't match any enum member + """ for member in Resources.StorageBucketType: if member.value == val: return member raise Exception(f"Unknown storage bucket type type {val}") def __init__(self, name: str): + """Initialize the resources base class. + + Args: + name: Platform name (e.g., 'aws', 'azure', 'gcp') + """ super().__init__() self._name = name self._buckets: Dict[Resources.StorageBucketType, str] = {} @@ -75,38 +153,99 @@ def __init__(self, name: str): @property def resources_id(self) -> str: + """Get the unique resource ID for this deployment. + + Returns: + str: Unique resource identifier + + Raises: + AssertionError: If no resource ID has been set + """ assert self._resources_id is not None return self._resources_id @resources_id.setter def resources_id(self, resources_id: str): + """Set the unique resource ID for this deployment. + + Args: + resources_id: Unique identifier for resource grouping + """ self._resources_id = resources_id @property def has_resources_id(self) -> bool: + """Check if a resource ID has been assigned. + + Returns: + bool: True if resource ID is set, False otherwise + """ return self._resources_id is not None @property def region(self) -> str: + """Get the cloud region for resource deployment. + + Returns: + str: Cloud region identifier + """ return self._region @region.setter def region(self, region: str): + """Set the cloud region for resource deployment. + + Args: + region: Cloud region identifier + """ self._region = region def get_storage_bucket(self, bucket_type: Resources.StorageBucketType) -> Optional[str]: + """Get the bucket name for a specific bucket type. + + Args: + bucket_type: Type of bucket to retrieve + + Returns: + Optional[str]: Bucket name if set, None otherwise + """ return self._buckets.get(bucket_type) def get_storage_bucket_name(self, bucket_type: Resources.StorageBucketType) -> str: + """Generate a standardized bucket name for a bucket type. + + Creates bucket names following the pattern: sebs-{type}-{resource_id} + + Args: + bucket_type: Type of bucket to name + + Returns: + str: Generated bucket name + """ return f"sebs-{bucket_type.value}-{self._resources_id}" def set_storage_bucket(self, bucket_type: Resources.StorageBucketType, bucket_name: str): + """Set the bucket name for a specific bucket type. + + Args: + bucket_type: Type of bucket to set + bucket_name: Name of the bucket + """ self._buckets[bucket_type] = bucket_name @staticmethod @abstractmethod - def initialize(res: Resources, dct: dict): + def initialize(res: "Resources", dct: dict): + """Initialize a Resources instance from configuration dictionary. + + This base implementation handles common resource initialization + including resource ID and storage bucket configuration. Platform-specific + implementations should call this method and add their own initialization. + Args: + res: Resources instance to initialize + dct: Configuration dictionary from cache or user config + """ if "resources_id" in dct: res._resources_id = dct["resources_id"] @@ -114,21 +253,31 @@ def initialize(res: Resources, dct: dict): for key, value in dct["storage_buckets"].items(): res._buckets[Resources.StorageBucketType.deserialize(key)] = value - """ - Create credentials instance from user config and cached values. - """ - @staticmethod @abstractmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": - pass + """Create resources instance from user config and cached values. - """ - Serialize to JSON for storage in cache. - """ + Args: + config: User-provided configuration dictionary + cache: Cache instance for loading stored resources + handlers: Logging handlers for error reporting + + Returns: + Resources: Platform-specific resources instance + """ + pass @abstractmethod def serialize(self) -> dict: + """Serialize resources to dictionary for cache storage. + + Subclasses should call `super().serialize()` and extend the dictionary. + This base implementation serializes `resources_id` and `storage_buckets`. + + Returns: + dict: Serialized resource data including resource ID and bucket mappings + """ out = {} if self.has_resources_id: out["resources_id"] = self.resources_id @@ -137,6 +286,14 @@ def serialize(self) -> dict: return out def update_cache(self, cache: Cache): + """Update the cache with current resource configuration. + + Stores the resource ID and storage bucket mappings in the cache + for future retrieval. + + Args: + cache: Cache instance to update + """ if self.has_resources_id: cache.update_config( val=self.resources_id, keys=[self._name, "resources", "resources_id"] @@ -147,43 +304,95 @@ def update_cache(self, cache: Cache): ) -""" - FaaS system config defining cloud region (if necessary), credentials and - resources allocated. -""" - - class Config(ABC, LoggingBase): + """Abstract base class for complete FaaS platform configuration. + + This class combines credentials and resources into a complete platform + configuration, along with platform-specific settings like region selection. + It provides the top-level configuration interface used throughout the + benchmarking framework. + + The Config class coordinates: + - Platform credentials for authentication + - Resource allocation and management + - Regional deployment settings + - Configuration persistence and caching + - Platform-specific parameter handling + """ _region: str def __init__(self, name: str): + """Initialize the configuration base class. + + Args: + name: Platform name (e.g., 'aws', 'azure', 'gcp') + """ super().__init__() self._region = "" self._name = name @property def region(self) -> str: + """Get the cloud region for deployment. + + Returns: + str: Cloud region identifier + """ return self._region @property @abstractmethod def credentials(self) -> Credentials: + """Get the platform credentials. + + Returns: + Credentials: Platform-specific credentials instance + """ pass @property @abstractmethod def resources(self) -> Resources: + """Get the platform resources. + + Returns: + Resources: Platform-specific resources instance + """ pass @staticmethod @abstractmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: "Config", dct: dict): + """Initialize a Config instance from configuration dictionary. + + Args: + cfg: Config instance to initialize + dct: Configuration dictionary + """ cfg._region = dct["region"] @staticmethod @abstractmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Config": + """Create configuration instance from user config and cached values. + + This method serves as a factory for platform-specific configurations, + dynamically loading the appropriate implementation based on the platform + name specified in the configuration. To do that, it calls + the appropriate subclass's deserialize method. + + Args: + config: User-provided configuration dictionary + cache: Cache instance for loading stored configuration + handlers: Logging handlers for error reporting + + Returns: + Config: Platform-specific configuration instance + + Raises: + AssertionError: If the platform type is unknown or unsupported + """ from sebs.local.config import LocalConfig name = config["name"] @@ -210,8 +419,21 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config @abstractmethod def serialize(self) -> dict: + """Serialize configuration to dictionary for cache storage. + + Subclasses should call `super().serialize()` and extend the dictionary. + This base implementation serializes `name` and `region`. + + Returns: + dict: Serialized configuration including platform name and region + """ return {"name": self._name, "region": self._region} @abstractmethod def update_cache(self, cache: Cache): + """Update the cache with current configuration settings. + + Args: + cache: Cache instance to update + """ cache.update_config(val=self.region, keys=[self._name, "region"]) diff --git a/sebs/faas/container.py b/sebs/faas/container.py index b17525f7..ee881a52 100644 --- a/sebs/faas/container.py +++ b/sebs/faas/container.py @@ -1,3 +1,19 @@ +"""Docker container management for serverless function deployments. + +This module provides the DockerContainer class for building and managing +Docker containers for serverless function deployments. It handles: + +- Building benchmark Docker images for different platforms +- Cross-architecture container compilation with emulation +- Container registry operations (push/pull) +- Progress tracking for container operations +- Platform-specific container naming and tagging + +The module supports container-based deployments across different serverless +platforms, with automatic detection of the host architecture and appropriate +configuration for cross-compilation when needed. +""" + from abc import abstractmethod import docker import json @@ -13,25 +29,68 @@ class DockerContainer(LoggingBase): + """Abstract base class for Docker container management in serverless deployments. + + This class provides common functionality for building, pushing, and managing + Docker containers for serverless function deployments. Each platform + implementation (AWS, Azure, GCP, etc.) extends this class to provide + platform-specific container handling. + + Key features: + - Container image building with cross-architecture support + - Container registry operations (push/pull/inspect) + - Progress tracking for long-running operations + - Platform-specific image naming and tagging + - Caching and optimization for repeated builds + + Attributes: + docker_client: Docker client for container operations + experimental_manifest: Whether to use experimental manifest inspection + system_config: SeBS configuration for image management + _disable_rich_output: Flag to disable rich progress output + """ + @staticmethod @abstractmethod def name() -> str: + """Get the platform name for this container implementation. + + Returns: + str: Platform name (e.g., 'aws', 'azure', 'gcp') + """ pass @property def disable_rich_output(self) -> bool: + """Get whether rich output is disabled. + + Returns: + bool: True if rich output is disabled, False otherwise + """ return self._disable_rich_output @disable_rich_output.setter def disable_rich_output(self, val: bool): + """Set whether to disable rich output. + + Args: + val: True to disable rich output, False to enable + """ self._disable_rich_output = val def __init__( self, system_config: SeBSConfig, - docker_client, + docker_client: docker.client, experimental_manifest: bool = False, ): + """Initialize the Docker container manager. + + Args: + system_config: SeBS configuration for container management + docker_client: Docker client for container operations + experimental_manifest: Whether to use experimental manifest features + """ super().__init__() self.docker_client = docker_client @@ -40,7 +99,18 @@ def __init__( self._disable_rich_output = False def find_image(self, repository_name, image_tag) -> bool: + """Check if a Docker image exists in the registry. + + Attempts to find an image in the registry using either experimental + manifest inspection (if enabled) or by attempting to pull the image. + Args: + repository_name: Name of the repository (e.g., 'my-repo/my-image') + image_tag: Tag of the image to find + + Returns: + bool: True if the image exists, False otherwise + """ if self.experimental_manifest: try: # This requires enabling experimental Docker features @@ -58,7 +128,20 @@ def find_image(self, repository_name, image_tag) -> bool: return False def show_progress(self, txt: str, progress: Progress, layer_tasks: dict): + """Update progress display for Docker operations. + + Parses Docker API output and updates the rich progress display for + operations like image pushing. Tracks individual layer progress and + handles completion events. + + Args: + txt: Docker API output line (JSON string or dict) + progress: Rich progress instance to update + layer_tasks: Dictionary tracking progress tasks for each layer + Raises: + Exception: If an error is reported in the Docker output + """ if isinstance(txt, str): line = json.loads(txt) else: @@ -89,6 +172,20 @@ def show_progress(self, txt: str, progress: Progress, layer_tasks: dict): raise Exception(line["error"]) def push_image(self, repository_uri, image_tag): + """Push a Docker image to a container registry. + + Pushes the specified image to the container registry with optional + progress tracking. Handles errors and provides informative logging + throughout the process. + + Args: + repository_uri: URI of the container registry repository + image_tag: Tag of the image to push + + Raises: + docker.errors.APIError: If the push operation fails + RuntimeError: If an error occurs during the push stream + """ try: if not self.disable_rich_output: @@ -124,6 +221,20 @@ def push_image(self, repository_uri, image_tag): def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: + """Generate registry name and image URI for a benchmark. + + Creates platform-specific naming for container images including + registry URL, repository name, image tag, and complete image URI. + + Args: + benchmark: Name of the benchmark (e.g., '110.dynamic-html') + language_name: Programming language (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (e.g., 'x64', 'arm64') + + Returns: + Tuple[str, str, str, str]: Registry name, repository name, image tag, full image URI + """ pass def build_base_image( @@ -135,15 +246,28 @@ def build_base_image( benchmark: str, is_cached: bool, ) -> Tuple[bool, str]: + """ - When building function for the first time (according to SeBS cache), - check if Docker image is available in the registry. - If yes, then skip building. - If no, then continue building. - - For every subsequent build, we rebuild image and push it to the - registry. These are triggered by users modifying code and enforcing - a build. + Build benchmark Docker image. + When building function for the first time (according to SeBS cache), + check if Docker image is available in the registry. + If yes, then skip building. + If no, then continue building. + + For every subsequent build, we rebuild image and push it to the + registry. These are triggered by users modifying code and enforcing + a build. + + Args: + directory: build directory + language_name: benchmark language + language_version: benchmark language version + architecture: CPU architecture + benchmark: benchmark name + is_cached: true if it the image is currently cached + + Returns: + Tuple[bool, str]: True if image was rebuilt, and image URI """ registry_name, repository_name, image_tag, image_uri = self.registry_name( diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 0fab7bcf..7fd4e314 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -1,3 +1,17 @@ +""" +Function and execution model for the serverless benchmarking framework. + +This module defines the core abstractions for serverless functions, including: +- Function class: Represents a deployed serverless function +- Trigger class: Represents invocation mechanisms for functions +- Runtime and FunctionConfig: Configuration parameters for functions +- ExecutionResult and related classes: Data model for capturing measurements + +These abstractions provide a unified interface for handling functions across +different FaaS platforms, allowing for consistent deployment, invocation, +and measurement collection. +""" + from __future__ import annotations import json @@ -12,12 +26,24 @@ from sebs.benchmark import Benchmark from sebs.utils import LoggingBase -""" - Times are reported in microseconds. -""" - class ExecutionTimes: + """ + Client-side timing measurements for function execution. + + Stores various timing measurements from the client's perspective, + including total execution time, HTTP connection times, and benchmark + runtime. All times are reported in microseconds unless otherwise specified. + + Attributes: + client: Total client-side execution time in microseconds + client_begin: Timestamp when the request was initiated + client_end: Timestamp when the response was received + benchmark: Benchmark execution time in microseconds + initialization: Function initialization time in microseconds + http_startup: Time to establish HTTP connection in seconds + http_first_byte_return: Time to first byte in seconds + """ client: int client_begin: datetime @@ -28,94 +54,217 @@ class ExecutionTimes: http_first_byte_return: int def __init__(self): + """Initialize with default values.""" self.client = 0 self.initialization = 0 self.benchmark = 0 @staticmethod def deserialize(cached_obj: dict) -> "ExecutionTimes": + """ + Create an ExecutionTimes instance from a dictionary. + + Args: + cached_obj: Dictionary containing serialized timing data + + Returns: + ExecutionTimes: New instance with the deserialized data + """ ret = ExecutionTimes() ret.__dict__.update(cached_obj) return ret class ProviderTimes: + """ + Provider-reported timing measurements for function execution. + + Stores timing measurements reported by the cloud provider, + including initialization time and execution time. + + Attributes: + initialization: Function initialization time in microseconds + execution: Function execution time in microseconds + """ initialization: int execution: int def __init__(self): + """Initialize with default values.""" self.execution = 0 self.initialization = 0 @staticmethod def deserialize(cached_obj: dict) -> "ProviderTimes": + """ + Create a ProviderTimes instance from a dictionary. + + Args: + cached_obj: Dictionary containing serialized timing data + + Returns: + ProviderTimes: New instance with the deserialized data + """ ret = ProviderTimes() ret.__dict__.update(cached_obj) return ret class ExecutionStats: + """ + Statistics for function execution. + + Tracks execution statistics such as memory usage, cold start status, + and execution failure. + + Attributes: + memory_used: Amount of memory used in MB (if available) + cold_start: Whether this was a cold start execution + failure: Whether the execution failed + """ memory_used: Optional[float] cold_start: bool failure: bool def __init__(self): + """Initialize with default values.""" self.memory_used = None self.cold_start = False self.failure = False @staticmethod def deserialize(cached_obj: dict) -> "ExecutionStats": + """ + Create an ExecutionStats instance from a dictionary. + + Args: + cached_obj: Dictionary containing serialized statistics + + Returns: + ExecutionStats: New instance with the deserialized data + """ ret = ExecutionStats() ret.__dict__.update(cached_obj) return ret class ExecutionBilling: + """ + Billing information for function execution. + + Tracks billing-related metrics such as allocated memory, + billed execution time, and GB-seconds consumed. + + Attributes: + memory: Allocated memory in MB + billed_time: Billed execution time in milliseconds + gb_seconds: GB-seconds consumed (memory/1024 * billed_time/1000) + """ _memory: Optional[int] _billed_time: Optional[int] _gb_seconds: int def __init__(self): + """Initialize with default values.""" self.memory = None self.billed_time = None self.gb_seconds = 0 @property def memory(self) -> Optional[int]: + """ + Get the allocated memory in MB. + + Returns: + int: Memory allocation in MB, or None if not available + """ return self._memory @memory.setter def memory(self, val: int): + """ + Set the allocated memory in MB. + + Args: + val: Memory allocation in MB + """ self._memory = val @property def billed_time(self) -> Optional[int]: + """ + Get the billed execution time in milliseconds. + + Returns: + int: Billed time in milliseconds, or None if not available + """ return self._billed_time @billed_time.setter def billed_time(self, val: int): + """ + Set the billed execution time in milliseconds. + + Args: + val: Billed time in milliseconds + """ self._billed_time = val @property def gb_seconds(self) -> int: + """ + Get the GB-seconds consumed. + + Returns: + int: GB-seconds consumed + """ return self._gb_seconds @gb_seconds.setter def gb_seconds(self, val: int): + """ + Set the GB-seconds consumed. + + Args: + val: GB-seconds consumed + """ self._gb_seconds = val @staticmethod def deserialize(cached_obj: dict) -> "ExecutionBilling": + """ + Create an ExecutionBilling instance from a dictionary. + + Args: + cached_obj: Dictionary containing serialized billing data + + Returns: + ExecutionBilling: New instance with the deserialized data + """ ret = ExecutionBilling() ret.__dict__.update(cached_obj) return ret class ExecutionResult: + """ + Comprehensive result of a function execution. + + This class captures all timing information, provider metrics, and function + output from a single function invocation. It provides methods for parsing + benchmark output and calculating metrics. + + Attributes: + output: Dictionary containing function output + request_id: Unique identifier for the request + times: ExecutionTimes containing client-side timing measurements + provider_times: ProviderTimes containing platform-reported timings + stats: ExecutionStats containing resource usage statistics + billing: ExecutionBilling containing cost-related information + """ output: dict request_id: str @@ -125,6 +274,7 @@ class ExecutionResult: billing: ExecutionBilling def __init__(self): + """Initialize with default values for all components.""" self.output = {} self.request_id = "" self.times = ExecutionTimes() @@ -134,6 +284,16 @@ def __init__(self): @staticmethod def from_times(client_time_begin: datetime, client_time_end: datetime) -> "ExecutionResult": + """ + Create an ExecutionResult with client-side timing information. + + Args: + client_time_begin: Timestamp when the request was initiated + client_time_end: Timestamp when the response was received + + Returns: + ExecutionResult: New instance with calculated client-side timing + """ ret = ExecutionResult() ret.times.client_begin = client_time_begin ret.times.client_end = client_time_end @@ -141,6 +301,17 @@ def from_times(client_time_begin: datetime, client_time_end: datetime) -> "Execu return ret def parse_benchmark_output(self, output: dict): + """ + Parse the output from a benchmark execution. + + Extracts timing information and cold start status from the benchmark output. + + Args: + output: Dictionary containing benchmark output + + Raises: + RuntimeError: If the invocation failed (missing required fields) + """ self.output = output # FIXME: temporary handling of errorenous invocation if "is_cold" not in self.output: @@ -156,6 +327,15 @@ def parse_benchmark_output(self, output: dict): @staticmethod def deserialize(cached_config: dict) -> "ExecutionResult": + """ + Create an ExecutionResult instance from a cached configuration. + + Args: + cached_config: Dictionary containing serialized execution result + + Returns: + ExecutionResult: New instance with the deserialized data + """ ret = ExecutionResult() ret.times = ExecutionTimes.deserialize(cached_config["times"]) ret.billing = ExecutionBilling.deserialize(cached_config["billing"]) @@ -166,28 +346,68 @@ def deserialize(cached_config: dict) -> "ExecutionResult": return ret -""" - Function trigger and implementation of invocation. +class Trigger(ABC, LoggingBase): + """ + Abstract base class for function triggers. - FIXME: implement a generic HTTP invocation and specialize input and output - processing in classes. -""" + A trigger represents a mechanism for invoking a serverless function, + such as HTTP requests, direct SDK invocations, or event-based triggers. + Each trigger type implements synchronous and asynchronous invocation methods. + Includes a helper method for HTTP invocations using pycurl. + """ -class Trigger(ABC, LoggingBase): class TriggerType(Enum): + """ + Enumeration of supported trigger types. + + Defines the different mechanisms for invoking serverless functions: + - HTTP: Invocation via HTTP requests + - LIBRARY: Invocation via cloud provider SDK + - STORAGE: Invocation via storage events + """ + HTTP = "http" LIBRARY = "library" STORAGE = "storage" @staticmethod def get(name: str) -> "Trigger.TriggerType": + """ + Get a TriggerType by name (case-insensitive). + + Args: + name: Name of the trigger type + + Returns: + TriggerType: The matching trigger type + + Raises: + Exception: If no matching trigger type is found + """ for member in Trigger.TriggerType: if member.value.lower() == name.lower(): return member - raise Exception("Unknown trigger type {}".format(member)) + raise Exception("Unknown trigger type {}".format(name)) def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> ExecutionResult: + """ + Invoke a function via HTTP request. + + Makes a HTTP POST request using pycurl to the given URL, with the provided payload, + and processes the response into an ExecutionResult. + + Args: + payload: Dictionary containing the function input + url: URL to invoke the function + verify_ssl: Whether to verify SSL certificates + + Returns: + ExecutionResult: Result of the function execution + + Raises: + RuntimeError: If the invocation fails or produces invalid output + """ import pycurl from io import BytesIO @@ -236,69 +456,172 @@ def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> Exec self.logging.error("No output provided!") raise RuntimeError(f"Failed invocation of function! Output: {data.getvalue().decode()}") - # FIXME: 3.7+, future annotations @staticmethod @abstractmethod def trigger_type() -> "Trigger.TriggerType": + """ + Get the type of this trigger. + + Returns: + TriggerType: The type of this trigger + """ pass @abstractmethod def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke a function with the given payload. + + Args: + payload: Dictionary containing the function input + + Returns: + ExecutionResult: Result of the function execution + """ pass @abstractmethod def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke a function with the given payload. + + Args: + payload: Dictionary containing the function input + + Returns: + Future: Future object representing the pending execution + """ pass @abstractmethod def serialize(self) -> dict: + """ + Serialize the trigger to a dictionary. + + Returns: + dict: Dictionary representation of the trigger + """ pass @staticmethod @abstractmethod def deserialize(cached_config: dict) -> "Trigger": + """ + Create a Trigger instance from a cached configuration. + + Args: + cached_config: Dictionary containing serialized trigger + + Returns: + Trigger: New instance with the deserialized data + """ pass class Language(Enum): + """ + Enumeration of supported programming languages. + + Currently supports Python and Node.js for serverless functions. + """ + PYTHON = "python" NODEJS = "nodejs" - # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(val: str) -> Language: + """ + Get a Language by string value. + + Args: + val: String representation of the language + + Returns: + Language: The matching language enum + + Raises: + Exception: If no matching language is found + """ for member in Language: if member.value == val: return member - raise Exception(f"Unknown language type {member}") + raise Exception(f"Unknown language type {val}") class Architecture(Enum): + """ + Enumeration of supported CPU architectures. + + Defines the CPU architectures that can be targeted for function deployment. + """ + X86 = "x64" ARM = "arm64" def serialize(self) -> str: + """ + Serialize the architecture to a string. + + Returns: + str: String representation of the architecture + """ return self.value @staticmethod def deserialize(val: str) -> Architecture: + """ + Get an Architecture by string value. + + Args: + val: String representation of the architecture + + Returns: + Architecture: The matching architecture enum + + Raises: + Exception: If no matching architecture is found + """ for member in Architecture: if member.value == val: return member - raise Exception(f"Unknown architecture type {member}") + raise Exception(f"Unknown architecture type {val}") @dataclass class Runtime: + """ + Runtime configuration for a serverless function. + + Defines the language and version for a function's runtime environment. + + Attributes: + language: Programming language (Python, Node.js) + version: Version string of the language runtime + """ language: Language version: str def serialize(self) -> dict: + """ + Serialize the runtime to a dictionary. + + Returns: + dict: Dictionary representation of the runtime + """ return {"language": self.language.value, "version": self.version} @staticmethod def deserialize(config: dict) -> Runtime: + """ + Create a Runtime instance from a dictionary. + + Args: + config: Dictionary containing serialized runtime + + Returns: + Runtime: New instance with the deserialized data + """ languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS} return Runtime(language=languages[config["language"]], version=config["version"]) @@ -308,6 +631,18 @@ def deserialize(config: dict) -> Runtime: @dataclass class FunctionConfig: + """ + Configuration for a serverless function. + + Defines the resources, runtime, and architecture for a function deployment. + + Attributes: + timeout: Maximum execution time in seconds + memory: Memory allocation in MB + runtime: Runtime environment configuration + architecture: CPU architecture for deployment + """ + timeout: int memory: int runtime: Runtime @@ -315,6 +650,16 @@ class FunctionConfig: @staticmethod def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: + """ + Create a FunctionConfig subclass instance from a benchmark. + + Args: + benchmark: Benchmark to extract configuration from + obj_type: Type of FunctionConfig to create + + Returns: + T: New instance of the specified FunctionConfig subclass + """ runtime = Runtime(language=benchmark.language, version=benchmark.language_version) architecture = Architecture.deserialize(benchmark._experiment_config._architecture) cfg = obj_type( @@ -327,28 +672,74 @@ def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: @staticmethod def from_benchmark(benchmark: Benchmark) -> FunctionConfig: + """ + Create a FunctionConfig instance from a benchmark. + + Args: + benchmark: Benchmark to extract configuration from + + Returns: + FunctionConfig: New instance with the benchmark's configuration + """ return FunctionConfig._from_benchmark(benchmark, FunctionConfig) @staticmethod def deserialize(data: dict) -> FunctionConfig: + """ + Create a FunctionConfig instance from a dictionary. + + Args: + data: Dictionary containing serialized function configuration + + Returns: + FunctionConfig: New instance with the deserialized data + """ keys = list(FunctionConfig.__dataclass_fields__.keys()) data = {k: v for k, v in data.items() if k in keys} data["runtime"] = Runtime.deserialize(data["runtime"]) return FunctionConfig(**data) def serialize(self) -> dict: - return self.__dict__ - + """ + Serialize the function configuration to a dictionary. -""" - Abstraction base class for FaaS function. Contains a list of associated triggers - and might implement non-trigger execution if supported by the SDK. - Example: direct function invocation through AWS boto3 SDK. -""" + Returns: + dict: Dictionary representation of the function configuration + """ + return self.__dict__ class Function(LoggingBase): + """ + Abstract base class for serverless functions. + + This class represents a deployed serverless function with its configuration + and contains a list of associated triggers. + Each cloud provider (AWS, Azure, GCP, etc.) implements a subclass with + platform-specific functionality. + + Represents a deployable unit of code on a FaaS platform. Contains details + about the benchmark it belongs to, its name, code hash, configuration, + and associated triggers. Subclasses implement provider-specific details. + + Attributes: + config: Function configuration + name: Name of the deployed function + benchmark: Name of the benchmark implemented by this function + code_package_hash: Hash of the deployed code package + updated_code: Whether the code has been updated since deployment + """ + def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfig): + """ + Initialize a Function instance. + + Args: + benchmark: Name of the benchmark + name: Name of the function + code_hash: Hash of the code package + cfg: Function configuration + """ super().__init__() self._benchmark = benchmark self._name = name @@ -359,48 +750,117 @@ def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfi @property def config(self) -> FunctionConfig: + """ + Get the function configuration. + + Returns: + FunctionConfig: Configuration of the function + """ return self._cfg @property - def name(self): + def name(self) -> str: + """ + Get the name of the function. + + Returns: + str: Name of the function + """ return self._name @property - def benchmark(self): + def benchmark(self) -> str: + """ + Get the name of the benchmark. + + Returns: + str: Name of the benchmark + """ return self._benchmark @property - def code_package_hash(self): + def code_package_hash(self) -> str: + """ + Get the hash of the code package. + + Returns: + str: Hash of the code package + """ return self._code_package_hash @code_package_hash.setter def code_package_hash(self, new_hash: str): + """ + Set the hash of the code package. + + Args: + new_hash: New hash of the code package + """ self._code_package_hash = new_hash @property def updated_code(self) -> bool: + """ + Check if the code has been updated since deployment. + + Returns: + bool: True if the code has been updated, False otherwise + """ return self._updated_code @updated_code.setter def updated_code(self, val: bool): + """ + Set whether the code has been updated since deployment. + + Args: + val: True if the code has been updated, False otherwise + """ self._updated_code = val def triggers_all(self) -> List[Trigger]: + """ + Get all triggers associated with this function. + + Returns: + List[Trigger]: List of all triggers + """ return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: + """ + Get triggers of a specific type associated with this function. + + Args: + trigger_type: Type of triggers to get + + Returns: + List[Trigger]: List of triggers of the specified type + """ try: return self._triggers[trigger_type] except KeyError: return [] def add_trigger(self, trigger: Trigger): + """ + Add a trigger to this function. + + Args: + trigger: Trigger to add + """ if trigger.trigger_type() not in self._triggers: self._triggers[trigger.trigger_type()] = [trigger] else: self._triggers[trigger.trigger_type()].append(trigger) def serialize(self) -> dict: + """ + Serialize the function to a dictionary. + + Returns: + dict: Dictionary representation of the function + """ return { "name": self._name, "hash": self._code_package_hash, @@ -414,4 +874,13 @@ def serialize(self) -> dict: @staticmethod @abstractmethod def deserialize(cached_config: dict) -> "Function": + """ + Create a Function instance from a cached configuration. + + Args: + cached_config: Dictionary containing serialized function + + Returns: + Function: New instance with the deserialized data + """ pass diff --git a/sebs/faas/nosql.py b/sebs/faas/nosql.py index 16f9ab11..43945f54 100644 --- a/sebs/faas/nosql.py +++ b/sebs/faas/nosql.py @@ -1,3 +1,12 @@ +""" +Module for NoSQL database storage abstraction in the Serverless Benchmarking Suite. + +This module provides an abstract base class for NoSQL database implementations +across different cloud platforms (AWS DynamoDB, Azure CosmosDB, Google Cloud Datastore) +and local development environments. It handles table creation, data writing, and +cache management for benchmark data stored in NoSQL databases. +""" + from abc import ABC from abc import abstractmethod from typing import Dict, Optional, Tuple @@ -8,20 +17,59 @@ class NoSQLStorage(ABC, LoggingBase): + """ + Abstract base class for NoSQL database storage implementations. + + This class defines the interface for NoSQL database operations across different + cloud platforms and local environments. Concrete implementations handle the + platform-specific details of creating tables, writing data, and managing + resources. + + Attributes: + cache_client: Client for caching database information + region: Cloud region where the database is deployed + """ + @staticmethod @abstractmethod def deployment_name() -> str: + """ + Get the name of the deployment platform. + + Returns: + str: Name of the deployment platform (e.g., 'aws', 'azure', 'gcp') + """ pass @property def cache_client(self) -> Cache: + """ + Get the cache client. + + Returns: + Cache: The cache client for database information + """ return self._cache_client @property - def region(self): + def region(self) -> str: + """ + Get the cloud region. + + Returns: + str: The cloud region where the database is deployed + """ return self._region def __init__(self, region: str, cache_client: Cache, resources: Resources): + """ + Initialize a NoSQL storage instance. + + Args: + region: Cloud region where the database is deployed + cache_client: Client for caching database information + resources: Resource configuration for the database + """ super().__init__() self._cache_client = cache_client self._cached = False @@ -30,40 +78,94 @@ def __init__(self, region: str, cache_client: Cache, resources: Resources): @abstractmethod def get_tables(self, benchmark: str) -> Dict[str, str]: + """ + Get a mapping of benchmark-defined table names to actual cloud provider table names. + + Args: + benchmark: Name of the benchmark + + Returns: + Dict[str, str]: Dictionary mapping table logical names to physical table names + """ pass @abstractmethod def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """ + Get the physical table name for a benchmark's logical table. + + Args: + benchmark: Name of the benchmark + table: Logical name of the table + + Returns: + Optional[str]: Physical table name if it exists, None otherwise + """ pass @abstractmethod def retrieve_cache(self, benchmark: str) -> bool: + """ + Retrieve cached table information for a benchmark. + Implementations should populate internal structures with cached table names/details. + + Args: + benchmark: Name of the benchmark + + Returns: + bool: True if cache was successfully retrieved, False otherwise + """ pass @abstractmethod def update_cache(self, benchmark: str): + """ + Update the cache with the latest table information for a benchmark. + + Args: + benchmark: Name of the benchmark + """ pass def envs(self) -> dict: - return {} + """ + Return a dictionary of environment variables that are required by functions + to access this NoSQL storage (e.g., connection strings, table names). + Default implementation returns an empty dictionary. Subclasses should override + if they need to expose environment variables. - """ - Each table name follow this pattern: - sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name} - - Each implementation should do the following - (1) Retrieve cached data - (2) Create missing table that do not exist - (3) Update cached data if anything new was created -> this is done separately - in benchmark.py once the data is uploaded by the benchmark. - """ + Returns: + dict: Dictionary of environment variables + """ + return {} def create_benchmark_tables( - self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None + self, + benchmark: str, + name: str, + primary_key: str, + secondary_key: Optional[str] = None, ): + """ + Checks if the table already exists in the cache. If not, creates a new table + with the specified keys. - if self.retrieve_cache(benchmark): + Each table name follows this pattern: + sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name} + + Each implementation should do the following: + 1. Retrieve cached data + 2. Create missing tables that do not exist + 3. Update cached data if anything new was created (done separately + in benchmark.py once the data is uploaded by the benchmark) + Args: + benchmark: Name of the benchmark + name: Logical name of the table + primary_key: Primary key field name + secondary_key: Optional secondary key field name + """ + if self.retrieve_cache(benchmark): table_name = self._get_table_name(benchmark, name) if table_name is not None: self.logging.info( @@ -72,20 +174,33 @@ def create_benchmark_tables( return self.logging.info(f"Preparing to create a NoSQL table {name} for benchmark {benchmark}") - self.create_table(benchmark, name, primary_key, secondary_key) - """ - - AWS: DynamoDB Table - Azure: CosmosDB Container - Google Cloud: Firestore in Datastore Mode, Database - """ - @abstractmethod def create_table( - self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None + self, + benchmark: str, + name: str, + primary_key: str, + secondary_key: Optional[str] = None, ) -> str: + """ + Create a new table for a benchmark. + + Provider-specific implementation details: + - AWS: DynamoDB Table + - Azure: CosmosDB Container + - Google Cloud: Firestore in Datastore Mode, Database/Collection + + Args: + benchmark: Name of the benchmark + name: Logical name of the table + primary_key: Primary key field name + secondary_key: Optional secondary key field name + + Returns: + str: Physical name of the created table + """ pass @abstractmethod @@ -97,22 +212,49 @@ def write_to_table( primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, ): - pass + """ + Write an item/document to the specified table/container. + This is used by benchmarks to populate tables with test data. - """ + Args: + Write data to a table. - AWS DynamoDB: Removing & recreating table is the cheapest & fastest option + benchmark: Name of the benchmark + table: Logical name of the table + data: Dictionary of data to write + primary_key: Tuple of (key_name, key_value) for the primary key + secondary_key: Optional tuple of (key_name, key_value) for the secondary key + """ + pass - Azure CosmosDB: recreate container + @abstractmethod + def clear_table(self, name: str) -> str: + """ + Clear all items from a table/container. + Currently not implemented for any of hte proivders. - Google Cloud: also likely recreate + Provider-specific implementation details: + - AWS DynamoDB: Removing & recreating table looks like the cheapest & fastest option. + - Azure CosmosDB: Recreate container or use specific API to delete items. + - Google Cloud: Likely recreate collection or use specific API. - """ + Args: + name: Name of the table to clear - @abstractmethod - def clear_table(self, name: str) -> str: + Returns: + str: Result message or status + """ pass @abstractmethod def remove_table(self, name: str) -> str: + """ + Remove a table completely. + + Args: + name: Name of the table to remove + + Returns: + str: Result message or status + """ pass diff --git a/sebs/faas/resources.py b/sebs/faas/resources.py index 140a719e..21f5c877 100644 --- a/sebs/faas/resources.py +++ b/sebs/faas/resources.py @@ -1,3 +1,14 @@ +"""System resource management for FaaS platforms. + +This module provides the abstract base class for managing system-level resources +across different serverless platforms. It coordinates access to storage services, +NoSQL databases, and other cloud resources needed for benchmark execution. + +Each platform implementation (AWS, Azure, GCP, Local, etc.) provides concrete +implementations that handle platform-specific resource management while +following the common interface defined here. +""" + from abc import abstractmethod, ABC from typing import Optional @@ -11,33 +22,66 @@ class SystemResources(ABC, LoggingBase): + """Abstract base class for system-level resource management. + + This class provides a common interface for managing cloud resources needed + by benchmark functions across different serverless platforms. It handles the + provisioning and access to storage services, NoSQL databases, and other + platform-specific resources. + + Attributes: + _config: Platform configuration containing credentials and settings + _cache_client: Cache client for storing resource configurations + _docker_client: Docker client for container-based resource management + """ + def __init__(self, config: Config, cache_client: Cache, docker_client: docker.client): + """Initialize the system resources manager. + Args: + config: Platform configuration with credentials and settings + cache_client: Cache client for configuration persistence + docker_client: Docker client for container management + """ super().__init__() self._config = config self._cache_client = cache_client self._docker_client = docker_client - """ - Access persistent storage instance. - It might be a remote and truly persistent service (AWS S3, Azure Blob..), - or a dynamically allocated local instance. - - :param replace_existing: replace benchmark input data if exists already - """ - @abstractmethod def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: - pass + """Get or create a persistent storage instance. - """ - Access instance of NoSQL storage. - It might be a remote and truly persistent service (AWS DynamoDB, Azure CosmosDB..), - or a dynamically allocated local instance (ScyllaDB). + Provides access to object storage services (S3, Azure Blob, GCS, MinIO) + for storing benchmark input data, function packages, and results. The + storage instance may be a cloud service or a locally deployed container. - """ + Args: + replace_existing: Whether to replace existing benchmark data. + If None, uses the default behavior for the platform. + + Returns: + PersistentStorage: Configured storage instance ready for use + + Raises: + RuntimeError: If storage service cannot be provisioned or accessed + """ + pass @abstractmethod def get_nosql_storage(self) -> NoSQLStorage: + """Get or create a NoSQL database storage instance. + + Provides access to NoSQL database services (DynamoDB, CosmosDB, + Datastore, ScyllaDB) for benchmarks that require structured data + storage with key-value or document-based operations. + The storage instance may be a cloud service or a locally deployed container. + + Returns: + NoSQLStorage: Configured NoSQL storage instance ready for use + + Raises: + RuntimeError: If NoSQL service cannot be provisioned or accessed + """ pass diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 5b93c053..b698f585 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -1,3 +1,20 @@ +"""Object storage abstraction for serverless benchmarks. + +This module provides the PersistentStorage abstract base class for managing +object storage across different cloud platforms and local deployments. It +handles bucket management, file operations, and benchmark data organization. + +The storage abstraction supports: +- Cross-platform object storage (S3, Azure Blob, GCS, MinIO) +- Benchmark data organization with input/output separation +- Bucket lifecycle management and naming conventions +- Benchmark files upload/download operations with caching +- Deployment discovery and resource management + +Each platform provides concrete implementations that handle platform-specific +API calls while following the common interface defined here. +""" + import os import re @@ -9,36 +26,82 @@ from sebs.cache import Cache from sebs.utils import LoggingBase -""" - Abstract class -""" - class PersistentStorage(ABC, LoggingBase): + """Abstract base class for persistent object storage implementations. + + This class defines the interface for object storage services across different + cloud platforms. It manages buckets, files, and benchmark data organization + while providing a consistent API regardless of the underlying storage service. + + Attributes: + cached: Whether bucket configuration is cached + _cache_client: Cache client for storing configuration + _input_prefixes: List of input data prefixes for benchmarks + _output_prefixes: List of output data prefixes for benchmarks + input_prefixes_files: Files associated with input prefixes + _replace_existing: Whether to replace existing files during uploads + _region: Cloud region for storage operations + _cloud_resources: Resource configuration for the platform + """ + @staticmethod @abstractmethod def deployment_name() -> str: + """Return the name of the FaaS deployment this storage belongs to (e.g., "aws", "azure"). + + Returns: + str: Platform name (e.g., 'aws', 'azure', 'gcp', 'minio') + """ pass @property def cache_client(self) -> Cache: + """Get the cache client for configuration storage. + + Returns: + Cache: Cache client instance + """ return self._cache_client @property - def replace_existing(self): + def replace_existing(self) -> bool: + """Flag indicating whether to replace existing files during operations. + + Returns: + bool: True if existing files should be replaced, False otherwise + """ return self._replace_existing @replace_existing.setter def replace_existing(self, val: bool): + """Set flag indicating whether to replace existing files during operations. + + Args: + val: True to replace existing files, False to skip + """ self._replace_existing = val @property - def region(self): + def region(self) -> str: + """Get the cloud region for storage operations. + + Returns: + str: Cloud region identifier + """ return self._region def __init__( self, region: str, cache_client: Cache, resources: Resources, replace_existing: bool ): + """Initialize the persistent storage instance. + + Args: + region: Cloud region for storage operations + cache_client: Cache client for configuration persistence + resources: Resource configuration for the platform + replace_existing: Whether to replace existing files during uploads + """ super().__init__() self._cache_client = cache_client self.cached = False @@ -51,18 +114,51 @@ def __init__( @property def input_prefixes(self) -> List[str]: + """Get the list of input data prefixes for benchmarks. + These are paths within the benchmark data bucket. + + Returns: + List[str]: List of input prefix names + """ return self._input_prefixes @property def output_prefixes(self) -> List[str]: + """Get the list of output data prefixes for benchmarks. + These are paths within the benchmark data bucket. + + Returns: + List[str]: List of output prefix names + """ return self._output_prefixes @abstractmethod def correct_name(self, name: str) -> str: + """Correct a bucket name to comply with platform naming requirements. + + Different platforms have different naming restrictions (character sets, + length limits, etc.). This method applies platform-specific corrections. + + Args: + name: Original bucket name + + Returns: + str: Corrected bucket name that complies with platform requirements + """ pass def find_deployments(self) -> List[str]: + """Find existing SeBS deployments by scanning bucket names. + + Scans all buckets in the storage service and extracts deployment IDs + from bucket names that follow the SeBS naming convention. This helps + identify existing deployments that can be reused. + + Looks for buckets named "sebs-benchmarks-*". + Returns: + List[str]: List of deployment resource IDs found in bucket names + """ deployments = [] buckets = self.list_buckets() for bucket in buckets: @@ -75,86 +171,155 @@ def find_deployments(self) -> List[str]: @abstractmethod def _create_bucket( - self, name: str, buckets: List[str] = [], randomize_name: bool = False + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False ) -> str: - pass + """Create a new storage bucket with platform-specific implementation. - """ - Download a file from a bucket. + Args: + name: Desired bucket name + buckets: Optional list of existing buckets to check against + randomize_name: Whether to add random suffix for uniqueness - :param bucket_name: - :param key: storage source filepath - :param filepath: local destination filepath - """ + Returns: + str: Name of the created bucket + + Raises: + Platform-specific exceptions for bucket creation failures + """ + pass @abstractmethod def download(self, bucket_name: str, key: str, filepath: str) -> None: - pass + """Download a file from a storage bucket. - """ - Upload a file to a bucket with by passing caching. - Useful for uploading code package to storage (when required). + Args: + bucket_name: Name of the source bucket + key: Storage source filepath (object key) + filepath: Local destination filepath - :param bucket_name: - :param filepath: local source filepath - :param key: storage destination filepath - """ + Raises: + Platform-specific exceptions for download failures + """ + pass @abstractmethod - def upload(self, bucket_name: str, filepath: str, key: str): - pass + def upload(self, bucket_name: str, filepath: str, key: str) -> None: + """Upload a file to a storage bucket. - """ - Retrieves list of files in a bucket. + Bypasses caching and directly uploads the file. Useful for uploading + code packages to storage when required by the deployment platform. - :param bucket_name: - :return: list of files in a given bucket - """ + Args: + bucket_name: Name of the destination bucket + filepath: Local source filepath + key: Storage destination filepath (object key) + + Raises: + Platform-specific exceptions for upload failures + """ + pass @abstractmethod def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """Retrieve list of files in a storage bucket. + + Args: + bucket_name: Name of the bucket to list + prefix: Optional prefix to filter objects + + Returns: + List[str]: List of file keys in the bucket matching the prefix + + Raises: + Platform-specific exceptions for listing failures + """ pass @abstractmethod def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """List all storage buckets/containers, optionally filtering + them with a prefix. + + Args: + bucket_name: Optional specific bucket prefix name to check for + + Returns: + List[str]: List of bucket names. If bucket_name is provided, + returns [bucket_name] if it exists, empty list otherwise. + + Raises: + Platform-specific exceptions for listing failures + """ pass @abstractmethod def exists_bucket(self, bucket_name: str) -> bool: + """Check if a storage bucket/container exists. + + Args: + bucket_name: Name of the bucket to check + + Returns: + bool: True if bucket exists, False otherwise + + Raises: + Platform-specific exceptions for access failures + """ pass @abstractmethod - def clean_bucket(self, bucket_name: str): + def clean_bucket(self, bucket_name: str) -> None: + """Remove all objects from a storage bucket. + + Args: + bucket_name: Name of the bucket to clean + + Raises: + Platform-specific exceptions for deletion failures + """ pass @abstractmethod - def remove_bucket(self, bucket: str): - pass + def remove_bucket(self, bucket: str) -> None: + """Delete a storage bucket completely. + The bucket must often be emptied afterwards. - """ - Allocate a set of input/output buckets for the benchmark. - The routine checks the cache first to verify that buckets have not - been allocated first. + Args: + bucket: Name of the bucket to remove - :param benchmark: benchmark name - :param buckets: number of input and number of output buckets - """ + Raises: + Platform-specific exceptions for deletion failures + """ + pass def benchmark_data( self, benchmark: str, requested_buckets: Tuple[int, int] ) -> Tuple[List[str], List[str]]: + """Allocate storage prefixes for benchmark input and output data. + Creates logical prefixes within the benchmarks bucket for organizing + benchmark input and output data. Checks cache first to avoid redundant + allocation and validates existing prefix configuration. + + Prefix naming format: + - Input: "benchmark-{idx}-input" + - Output: "benchmark-{idx}-output" + + Args: + benchmark: Name of the benchmark + requested_buckets: Tuple of (input_prefix_count, output_prefix_count) + + Returns: + Tuple[List[str], List[str]]: Lists of (input_prefixes, output_prefixes) """ - Add an input path inside benchmarks bucket. - Bucket name format: name-idx-input - """ + + # Add input prefixes inside benchmarks bucket + # Prefix format: name-idx-input for i in range(0, requested_buckets[0]): self.input_prefixes.append("{}-{}-input".format(benchmark, i)) - """ - Add an input path inside benchmarks bucket. - Bucket name format: name-idx-output - """ + # Add output prefixes inside benchmarks bucket + # Prefix format: name-idx-output for i in range(0, requested_buckets[1]): self.output_prefixes.append("{}-{}-output".format(benchmark, i)) @@ -163,20 +328,20 @@ def benchmark_data( if cached_storage is not None: - cached_storage = cached_storage["buckets"] + cached_buckets = cached_storage["buckets"] # verify the input is up to date for prefix in self.input_prefixes: - if prefix not in cached_storage["input"]: + if prefix not in cached_buckets["input"]: self.cached = False for prefix in self.output_prefixes: - if prefix not in cached_storage["output"]: + if prefix not in cached_buckets["output"]: self.cached = False else: self.cached = False - if self.cached is True and cached_storage["input_uploaded"] is False: + if cached_storage is not None and cached_storage["input_uploaded"] is False: self.cached = False # query buckets if the input prefixes changed, or the input is not up to date. @@ -204,54 +369,24 @@ def benchmark_data( return self.input_prefixes, self.output_prefixes - # def allocate_buckets(self, benchmark: str, requested_buckets: Tuple[int, int]): - - # benchmarks_bucket = self.benchmarks_bucket() - - # Load cached information - # cached_buckets = self.cache_client.get_storage_config(self.deployment_name(), benchmark) - # if cached_buckets: - # cache_valid = True - # for bucket in [ - # *cached_buckets["buckets"]["input"], - # *cached_buckets["buckets"]["output"], - # ]: - # if not self.exists_bucket(bucket): - # cache_valid = False - # self.logging.info(f"Cached storage buckets {bucket} does not exist.") - # break - - # if cache_valid: - # self.input_buckets = cached_buckets["buckets"]["input"] - # for bucket in self.input_buckets: - # self.input_buckets_files.append(self.list_bucket(bucket)) - # self.output_buckets = cached_buckets["buckets"]["output"] - # # for bucket in self.output_buckets: - # # self.clean_bucket(bucket) - # self.cached = True - # self.logging.info( - # "Using cached storage input buckets {}".format(self.input_buckets) - # ) - # self.logging.info( - # "Using cached storage output buckets {}".format(self.output_buckets) - # ) - # return - # else: - # self.logging.info("Cached storage buckets are no longer valid, creating new ones.") - - # buckets = self.list_buckets(self.correct_name(benchmark)) - # for i in range(0, requested_buckets[0]): - # self.input_buckets.append( - # self._create_bucket(self.correct_name("{}-{}-input".format(benchmark, i)), buckets) - # ) - # self.input_buckets_files.append(self.list_bucket(self.input_buckets[-1])) - # for i in range(0, requested_buckets[1]): - # self.output_buckets.append( - # self._create_bucket(self.correct_name("{}-{}-output".format(benchmark, i)), buckets) - # ) - # self.save_storage(benchmark) - def get_bucket(self, bucket_type: Resources.StorageBucketType) -> str: + """Get or create a storage bucket for the specified type. + + + Checks if the bucket is already known in `_cloud_resources`. If not, + generates a bucket name following the standard naming convention, + checks if it exists in the cloud, creates it + if necessary, and then stores it in `_cloud_resources`. + + Args: + bucket_type: Type of bucket to retrieve (BENCHMARKS, EXPERIMENTS, DEPLOYMENT) + + Returns: + str: Name of the bucket for the specified type + + Raises: + Platform-specific exceptions for bucket operations + """ bucket = self._cloud_resources.get_storage_bucket(bucket_type) if bucket is None: @@ -276,30 +411,46 @@ def get_bucket(self, bucket_type: Resources.StorageBucketType) -> str: return bucket - """ - Implements a handy routine for uploading input data by benchmarks. - It should skip uploading existing files unless storage client has been - initialized to override existing data. - - :param bucket_idx: index of input bucket - :param file: name of file to upload - :param filepath: filepath in the storage - """ - @abstractmethod def uploader_func(self, bucket_idx: int, file: str, filepath: str) -> None: + """Upload benchmark input data to storage with smart caching. + + Implements a utility function for uploading benchmark input data that + respects caching preferences. Skips uploading existing files unless + the storage client has been configured to override existing data. + + This is used by each benchmark to prepare input benchmark files. + + Args: + bucket_idx: Index of the input prefix/bucket + file: Name of the file to upload + filepath: Storage destination filepath (object key) + + Raises: + Platform-specific exceptions for upload failures + """ pass - """ - Download all files in a storage bucket. - Warning: assumes flat directory in a bucket! Does not handle bucket files - with directory marks in a name, e.g. 'dir1/dir2/file' - """ + def download_bucket(self, bucket_name: str, output_dir: str) -> None: + """Download all files from a storage bucket to a local directory. + + Downloads every file from the specified bucket to a local output directory. + Only downloads files that don't already exist locally. - def download_bucket(self, bucket_name: str, output_dir: str): + Warning: + Assumes flat directory structure in bucket. Does not handle object + keys with directory separators (e.g., 'dir1/dir2/file'). + + Args: + bucket_name: Name of the bucket to download from + output_dir: Local directory to download files to + + Raises: + Platform-specific exceptions for download failures + """ files = self.list_bucket(bucket_name) - for f in files: - output_file = os.path.join(output_dir, f) + for file_key in files: + output_file = os.path.join(output_dir, file_key) if not os.path.exists(output_file): - self.download(bucket_name, f, output_file) + self.download(bucket_name, file_key, output_file) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 9fbe0e27..53dcbe25 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -1,3 +1,13 @@ +""" +Module providing the core abstraction for Function-as-a-Service (FaaS) systems. + +This module defines the base System class that provides consistent interfaces for +working with different serverless platforms (AWS Lambda, Azure Functions, Google Cloud +Functions, OpenWhisk, etc.). It handles function lifecycle management, code packaging, +deployment, triggering, and metrics collection while abstracting away platform-specific +details. +""" + from abc import ABC from abc import abstractmethod from random import randrange @@ -15,16 +25,33 @@ from sebs.utils import LoggingBase from .config import Config -""" - This class provides basic abstractions for the FaaS system. - It provides the interface for initialization of the system and storage - services, creation and update of serverless functions and querying - logging and measurements services to obtain error messages and performance - measurements. -""" - class System(ABC, LoggingBase): + """ + Abstract base class for FaaS system implementations. + + This class provides basic abstractions for all supported FaaS platforms. + It defines the interface for system initialization, resource management, + function deployment, code packaging, function invocation, and metrics collection. + Each cloud provider implements a concrete subclass of this abstract base. + + The class handles: + - System and storage service initialization + - Creation and updating of serverless functions + - Function code packaging and deployment + - Trigger creation and management + - Metrics collection and error handling + - Caching of functions to avoid redundant deployments + - Cold start management + + Attributes: + system_config: Global SeBS configuration + docker_client: Docker client for building code packages and containers + cache_client: Cache client for storing function and deployment information + cold_start_counter: Counter for generating unique function names to force cold starts + system_resources: Resources manager for the specific cloud platform + """ + def __init__( self, system_config: SeBSConfig, @@ -32,60 +59,134 @@ def __init__( docker_client: docker.client, system_resources: SystemResources, ): + """ + Initialize a FaaS system implementation. + + Args: + system_config: Global SeBS configuration settings + cache_client: Cache client for storing function and deployment information + docker_client: Docker client for building code packages and containers + system_resources: Resources manager for the specific cloud platform + """ super().__init__() self._system_config = system_config self._docker_client = docker_client self._cache_client = cache_client + # Initialize with random value to help with cold start detection/forcing self._cold_start_counter = randrange(100) - self._system_resources = system_resources @property def system_config(self) -> SeBSConfig: + """ + Get the global SeBS configuration. + + Returns: + SeBSConfig: The system configuration + """ return self._system_config @property def docker_client(self) -> docker.client: + """ + Get the Docker client. + + Returns: + docker.client: The Docker client + """ return self._docker_client @property def cache_client(self) -> Cache: + """ + Get the cache client. + + Returns: + Cache: The cache client + """ return self._cache_client @property def cold_start_counter(self) -> int: + """ + Get the cold start counter. + + A counter used in attempts to enforce cold starts. + Its value might be incorporated into function environment variables. + + Returns: + int: The current cold start counter value + """ return self._cold_start_counter @cold_start_counter.setter def cold_start_counter(self, val: int): + """ + Set the cold start counter. + + Args: + val: The new counter value + """ self._cold_start_counter = val @property @abstractmethod def config(self) -> Config: + """ + Get the platform-specific configuration. + + Returns: + Config: The platform-specific configuration + """ pass @property def system_resources(self) -> SystemResources: + """ + Get the platform-specific resources manager. + + Returns: + SystemResources: The resources manager + """ return self._system_resources @staticmethod @abstractmethod def function_type() -> "Type[Function]": + """ + Get the platform-specific Function class type. + + Returns: + Type[Function]: The Function class for this platform + """ pass def find_deployments(self) -> List[str]: - - """ - Default implementation that uses storage buckets. - data storage accounts. - This can be overriden, e.g., in Azure that looks for unique """ + Find existing deployments in the cloud platform. + + Default implementation uses storage buckets to identify deployments. + This can be overridden by platform-specific implementations, e.g., + Azure that looks for unique storage accounts. + Returns: + List[str]: List of existing deployment resource IDs + """ return self.system_resources.get_storage().find_deployments() def initialize_resources(self, select_prefix: Optional[str]): + """ + Initialize cloud resources for the deployment. + This method either: + 1. Uses an existing resource ID from configuration + 2. Finds existing deployment in the cloud and reuses it, matching the optional prefix + 3. If no suitable existing deployment is found or specified, + a new unique resource ID is generated. + + Args: + select_prefix: Optional prefix to match when looking for existing deployments + """ # User provided resources or found in cache if self.config.resources.has_resources_id: self.logging.info( @@ -98,7 +199,6 @@ def initialize_resources(self, select_prefix: Optional[str]): # If a prefix is specified, we find the first matching resource ID if select_prefix is not None: - for dep in deployments: if select_prefix in dep: self.logging.info( @@ -117,7 +217,7 @@ def initialize_resources(self, select_prefix: Optional[str]): ) self.logging.warning("Deployment resource IDs in the cloud: " f"{deployments}") - # create + # Create a new unique resource ID res_id = "" if select_prefix is not None: res_id = f"{select_prefix}-{str(uuid.uuid1())[0:8]}" @@ -125,30 +225,46 @@ def initialize_resources(self, select_prefix: Optional[str]): res_id = str(uuid.uuid1())[0:8] self.config.resources.resources_id = res_id self.logging.info(f"Generating unique resource name {res_id}") - # ensure that the bucket is created - this allocates the new resource + + # Ensure that the bucket is created - this allocates the new resource self.system_resources.get_storage().get_bucket(Resources.StorageBucketType.BENCHMARKS) - """ - Initialize the system. After the call the local or remote - FaaS system should be ready to allocate functions, manage - storage resources and invoke functions. + def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize the system. - :param config: systems-specific parameters - """ + After this call completes, the local or remote FaaS system should be ready + to allocate functions, manage storage resources, and invoke functions. + Subclasses should override this to perform provider-specific initialization. - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + Args: + config: System-specific parameters + resource_prefix: Optional prefix for resource naming + """ pass - """ - Apply the system-specific code packaging routine to build benchmark. + @abstractmethod + def package_code( + self, + directory: str, + language_name: str, + language_version: str, + architecture: str, + benchmark: str, + is_cached: bool, + container_deployment: bool, + ) -> Tuple[str, int, str]: + """ + Apply system-specific code packaging to prepare a deployment package. + The benchmark creates a code directory with the following structure: - [benchmark sources] - - [benchmark resources] + - [benchmark resources], e.g., HTML template or ffmpeg binary - [dependence specification], e.g. requirements.txt or package.json - - [handlers implementation for the language and deployment] + - [language-speicifc wrappers implementation for the specific system] - This step allows us to change the structure above to fit different - deployment requirements, Example: a zip file for AWS or a specific + This step transforms that structure to fit platform-specific deployment + requirements, such as creating a zip file for AWS or container image. Args: directory: Path to the code directory @@ -162,21 +278,9 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] Returns: Tuple containing: - Path to packaged code - - Size of the package - - Container URI - """ - - @abstractmethod - def package_code( - self, - directory: str, - language_name: str, - language_version: str, - architecture: str, - benchmark: str, - is_cached: bool, - container_deployment: bool, - ) -> Tuple[str, int, str]: + - Size of the package in bytes + - Container URI (if container deployment, otherwise empty string) + """ pass @abstractmethod @@ -209,6 +313,17 @@ def create_function( @abstractmethod def cached_function(self, function: Function): + """ + Perform any necessary operations for a cached function. + + This method is called when a function is found in the cache. It may perform + platform-specific operations such as checking if the function still exists + in the cloud, updating permissions, re-initializing transient client objects, + or ensuring associated resources (like triggers) are correctly configured. + + Args: + function: The cached function instance + """ pass @abstractmethod @@ -220,7 +335,7 @@ def update_function( container_uri: str, ): """ - Update an existing function in the FaaS platform. + Update an existing function in the FaaS platform with new code and/or configuration. Args: function: Existing function instance to update @@ -233,21 +348,32 @@ def update_function( """ pass - """ - a) if a cached function with given name is present and code has not changed, - then just return function name - b) if a cached function is present and the cloud code has a different - code version, then upload new code - c) if no cached function is present, then create code package and - either create new function or update an existing but uncached one + def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) -> Function: + """ + Get or create a function for a benchmark. - Benchmark rebuild is requested but will be skipped if source code is - not changed and user didn't request update. + This method handles the complete function creation/update workflow: - """ + 1. If a cached function with the given name exists and code has not changed, + returns the cached function (after potential configuration checks/updates) + 2. If a cached function exists but the code hash differs or rebuild is foreced, + update the function code in the cloud. + 3. If no cached function exists, creates a new function - def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) -> Function: + Benchmark code is built (via `code_package.build`) before these steps. + The build might be skipped if source code hasn't changed and no update is forced. + + Args: + code_package: The benchmark containing the function code + func_name: Optional name for the function (will be generated if not provided) + + Returns: + Function: The function instance + Raises: + Exception: If the language version is not supported by this platform + """ + # Verify language version compatibility if code_package.language_version not in self.system_config.supported_language_versions( self.name(), code_package.language_name, code_package.architecture ): @@ -259,35 +385,32 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) ) ) + # Generate function name if not provided if not func_name: func_name = self.default_function_name(code_package) + + # Build the code package rebuilt, _, container_deployment, container_uri = code_package.build(self.package_code) - """ - There's no function with that name? - a) yes -> create new function. Implementation might check if a function - with that name already exists in the cloud and update its code. - b) no -> retrieve function from the cache. Function code in cloud will - be updated if the local version is different. - """ + # Check if function exists in cache functions = code_package.functions - is_function_cached = not (not functions or func_name not in functions) + if is_function_cached: - # retrieve function + # Retrieve function from cache cached_function = functions[func_name] code_location = code_package.code_location try: function = self.function_type().deserialize(cached_function) except RuntimeError as e: - self.logging.error( f"Cached function {cached_function['name']} is no longer available." ) self.logging.error(e) is_function_cached = False + # Create new function if not cached or deserialize failed if not is_function_cached: msg = ( "function name not provided." @@ -307,13 +430,14 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) code_package.query_cache() return function else: - + # Handle existing function assert function is not None self.cached_function(function) self.logging.info( "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_location) ) - # is the function up-to-date? + + # Check if code needs to be updated if function.code_package_hash != code_package.hash or rebuilt: if function.code_package_hash != code_package.hash: self.logging.info( @@ -324,9 +448,11 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) ) if rebuilt: self.logging.info( - f"Enforcing rebuild and update of of cached function " + f"Enforcing rebuild and update of cached function " f"{func_name} with hash {function.code_package_hash}." ) + + # Update function code self.update_function(function, code_package, container_deployment, container_uri) function.code_package_hash = code_package.hash function.updated_code = True @@ -337,28 +463,48 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) function=function, ) code_package.query_cache() - # code up to date, but configuration needs to be updated - # FIXME: detect change in function config + + # Check if configuration needs to be updated elif self.is_configuration_changed(function, code_package): self.update_function_configuration(function, code_package) self.cache_client.update_function(function) code_package.query_cache() else: self.logging.info(f"Cached function {func_name} is up to date.") + return function @abstractmethod def update_function_configuration(self, cached_function: Function, benchmark: Benchmark): - pass + """ + Update the configuration of an existing function on the FaaS plaform. - """ - This function checks for common function parameters to verify if their value is - still up to date. - """ + This method is called when a function's code is up-to-date but its + configuration (memory, timeout, environment variable, etc.) needs to be updated. + + Args: + cached_function: The function to update + benchmark: The benchmark containing the new configuration + """ + pass def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: + """ + Check if a function's configuration needs to be updated. + + This function checks for common function parameters to verify if their + values are still up to date with the benchmark configuration. + + Args: + cached_function: The existing function + benchmark: The benchmark with potential new configuration + Returns: + bool: True if configuration has changed, False otherwise + """ changed = False + + # Check common configuration attributes for attr in ["timeout", "memory"]: new_val = getattr(benchmark.benchmark_config, attr) old_val = getattr(cached_function.config, attr) @@ -370,6 +516,7 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma changed = True setattr(cached_function.config, attr, new_val) + # Check language/runtime attributes for lang_attr in [["language"] * 2, ["language_version", "version"]]: new_val = getattr(benchmark, lang_attr[0]) old_val = getattr(cached_function.config.runtime, lang_attr[1]) @@ -389,10 +536,31 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """ + Generate a default function name for a benchmark. + + Args: + code_package: The benchmark to generate a name for + resources: Optional resources configuration + + Returns: + str: Generated function name + """ pass @abstractmethod def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + """ + Force cold starts for the specified functions. + + This method implements platform-specific techniques to ensure that + subsequent invocations of the functions will be cold starts. + In practice, this usually uses an update of environment variables with new values. + + Args: + functions: List of functions to enforce cold starts for + code_package: The benchmark associated with the functions + """ pass @abstractmethod @@ -404,26 +572,54 @@ def download_metrics( requests: Dict[str, ExecutionResult], metrics: dict, ): + """ + Download provider-specific performance metrics from the cloud platform. + + This typically involves querying a logging or monitoring service (e.g., CloudWatch, + Application Insights) for details like actual execution duration, memory usage, etc., + and populating the `requests` (ExecutionResult objects) and `metrics` dictionaries. + + Args: + function_name: Name of the function to get metrics for + start_time: Start timestamp for metrics collection + end_time: End timestamp for metrics collection + requests: Dictionary of execution results + metrics: Dictionary to store the downloaded metrics + """ pass @abstractmethod def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: - pass + """ + Create a trigger for a function. - def disable_rich_output(self): + Args: + function: The function to create a trigger for + trigger_type: Type of trigger to create + + Returns: + Trigger: The created trigger + """ pass - # @abstractmethod - # def get_invocation_error(self, function_name: str, - # start_time: int, end_time: int): - # pass + def disable_rich_output(self): + """ + Disable rich output for platforms that support it, e.g, progress of pushing Docker images. - """ - Shutdown local FaaS instances, connections and clients. - """ + This is mostly used in testing environments or CI pipelines. + """ + pass @abstractmethod def shutdown(self) -> None: + """ + Shutdown the FaaS system. + + This should release any acquired resources, stop any running local services + (like Docker containers started by SeBS for CLI interactions), and update + the cache with the final system configuration. + This should be called when the system is no longer needed. + """ try: self.cache_client.lock() self.config.update_cache(self.cache_client) @@ -433,4 +629,10 @@ def shutdown(self) -> None: @staticmethod @abstractmethod def name() -> str: + """ + Get the name of the platform. + + Returns: + str: Platform name (e.g., 'aws', 'azure', 'gcp') + """ pass diff --git a/sebs/gcp/__init__.py b/sebs/gcp/__init__.py index f76e7c75..ad3f3f69 100644 --- a/sebs/gcp/__init__.py +++ b/sebs/gcp/__init__.py @@ -1,3 +1,44 @@ +"""Google Cloud Platform (GCP) integration for SeBS. + +This package provides comprehensive Google Cloud Platform support, +including Cloud Functions deployment, Cloud Storage +for object storage, Firestore/Datastore for NoSQL operations, and Cloud Monitoring +for performance metrics collection. + +The package includes: +- Function deployment and management via Cloud Functions API +- Object storage through Google Cloud Storage buckets +- NoSQL database operations using Firestore in Datastore mode +- Performance monitoring via Cloud Monitoring and Cloud Logging +- Docker-based gcloud CLI integration for administrative operations +- Credential and resource management + +Modules: + gcp: Main GCP system implementation + config: Configuration and credential management + storage: Cloud Storage integration + function: Cloud Function representation + triggers: Function invocation triggers + datastore: Firestore/Datastore NoSQL implementation + resources: System resource management + cli: gcloud CLI integration + +Example: + Basic GCP system setup: + + from sebs.gcp import GCP, GCPConfig + + # Configure GCP with credentials + config = GCPConfig.deserialize(config_dict, cache, handlers) + + # Initialize GCP system + gcp_system = GCP(system_config, config, cache, docker_client, handlers) + gcp_system.initialize() + + # Deploy a function + function = gcp_system.create_function(benchmark, "my-function", False, "") +""" + from .gcp import GCP # noqa from .config import GCPConfig # noqa from .storage import GCPStorage # noqa diff --git a/sebs/gcp/cli.py b/sebs/gcp/cli.py index 65ca33bc..a38b43d0 100644 --- a/sebs/gcp/cli.py +++ b/sebs/gcp/cli.py @@ -1,3 +1,21 @@ +"""Google Cloud CLI integration for SeBS. + +This module provides a Docker-based Google Cloud CLI interface. +Currently, we use it mostly to allocate and manage Datastore accounts. +There's no API or Python library for that. + +Classes: + GCloudCLI: Docker-based gcloud CLI interface for GCP operations + +Example: + Using the gcloud CLI interface: + + cli = GCloudCLI(credentials, system_config, docker_client) + cli.login(project_name) + result = cli.execute("gcloud functions list") + cli.shutdown() +""" + import logging import os @@ -9,13 +27,46 @@ class GCloudCLI(LoggingBase): + """Docker-based Google Cloud CLI interface. + + Provides a containerized environment for executing gcloud commands with + proper authentication and project configuration. Uses a Docker container + with the gcloud CLI pre-installed and configured. + + Attributes: + docker_instance: Running Docker container with gcloud CLI + """ + @staticmethod def typename() -> str: + """Get the type name for this CLI implementation. + + Returns: + Type name string for GCP CLI + """ return "GCP.CLI" def __init__( self, credentials: GCPCredentials, system_config: SeBSConfig, docker_client: docker.client - ): + ) -> None: + """Initialize the gcloud CLI Docker container. + + Sets up a Docker container with the gcloud CLI, pulling the image if needed + and mounting the GCP credentials file for authentication. + + Initialize GCloudCLI and start the Docker container. + Pulls the gcloud CLI Docker image if not found locally, then runs a + container in detached mode with credentials mounted. + + + Args: + credentials: GCP credentials with service account file path + system_config: SeBS system configuration + docker_client: Docker client for container management + + Raises: + RuntimeError: If Docker image pull fails + """ super().__init__() @@ -48,20 +99,19 @@ def __init__( tty=True, ) self.logging.info(f"Started gcloud CLI container: {self.docker_instance.id}.") - # while True: - # try: - # dkg = self.docker_instance.logs(stream=True, follow=True) - # next(dkg).decode("utf-8") - # break - # except StopIteration: - # pass - """ - Execute the given command in Azure CLI. - Throws an exception on failure (commands are expected to execute succesfully). - """ + def execute(self, cmd: str) -> bytes: + """Execute a command in the gcloud CLI container. + + Args: + cmd: Command string to execute in the container - def execute(self, cmd: str): + Returns: + Command output as bytes + + Raises: + RuntimeError: If the command fails (non-zero exit code) + """ exit_code, out = self.docker_instance.exec_run(cmd) if exit_code != 0: raise RuntimeError( @@ -71,27 +121,33 @@ def execute(self, cmd: str): ) return out - """ - Run gcloud auth command on Docker instance. - - Important: we cannot run "init" as this always requires authenticating through a browser. - Instead, we authenticate as a service account. - - Setting cloud project will show a warning about missing permissions - for Cloud Resource Manager API: I don't know why, we don't seem to need it. - - Because of that, it will ask for verification to continue - which we do by passing "Y". - """ - - def login(self, project_name: str): + def login(self, project_name: str) -> None: + """Authenticate gcloud CLI and set the active project. + + Authenticates using the mounted credentials file (`/credentials.json` in + the container) and then sets the active Google Cloud project. + Automatically confirms any prompts that may appear during project setup. + Important: + - `gcloud init` is not used as it requires browser-based authentication. + Instead, we authenticate as a service account. + - Setting the project might show warnings about Cloud Resource Manager API + permissions, which are generally not needed for SeBS operations. + + Args: + project_name: GCP project ID to set as active + + Note: + Uses service account authentication instead of browser-based auth. + May show warnings about Cloud Resource Manager API permissions. + """ self.execute("gcloud auth login --cred-file=/credentials.json") self.execute(f"/bin/bash -c 'gcloud config set project {project_name} <<< Y'") self.logging.info("gcloud CLI login succesful") - """ - Shuts down the Docker instance. - """ + def shutdown(self) -> None: + """Shutdown the gcloud CLI Docker container. - def shutdown(self): + Stops and removes the Docker container used for gcloud operations. + """ self.logging.info("Stopping gcloud CLI manage Docker instance") self.docker_instance.stop() diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 56d3b5c4..8035ef39 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -1,28 +1,60 @@ +"""Configuration classes for Google Cloud Platform (GCP) integration. + +This module provides configuration classes for GCP, +including credentials management, resource allocation, and cloud region configuration. +It handles authentication through service account JSON files and manages project-specific +settings required for Cloud Functions deployment and execution. + +Classes: + GCPCredentials: Handles authentication and project identification + GCPResources: Manages allocated cloud resources + GCPConfig: Main configuration container for GCP deployment + +Example: + Basic GCP configuration setup: + + credentials = GCPCredentials("/path/to/service-account.json") + resources = GCPResources() + config = GCPConfig(credentials, resources) +""" + import json import os -from typing import cast, List, Optional, Tuple +from typing import cast, Dict, List, Optional, Tuple from sebs.cache import Cache from sebs.faas.config import Config, Credentials, Resources from sebs.utils import LoggingHandlers -# FIXME: Replace type hints for static generators after migration to 3.7 -# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel -""" - Credentials for FaaS system used to authorize operations on functions - and other resources. - - The order of credentials initialization: - 1. Load credentials from cache. - 2. If any new values are provided in the config, they override cache values. - 3. If nothing is provided, initialize using environmental variables. - 4. If no information is provided, then failure is reported. -""" +class GCPCredentials(Credentials): + """Credentials manager for Google Cloud Platform authentication. + Handles authentication to GCP services using service account JSON files. + Automatically extracts project ID from credentials and manages environment + variable setup for Google Cloud SDK authentication. -class GCPCredentials(Credentials): - def __init__(self, gcp_credentials: str): + The class supports multiple credential sources in priority order: + 1. User-provided credentials file path + 2. GOOGLE_APPLICATION_CREDENTIALS environment variable + 3. GCP_SECRET_APPLICATION_CREDENTIALS environment variable + + Attributes: + _gcp_credentials: Path to the service account JSON file + _project_id: GCP project ID extracted from credentials + """ + + def __init__(self, gcp_credentials: str) -> None: + """Initialize GCP credentials with service account file. + + Args: + gcp_credentials: Path to the GCP service account JSON file + + Raises: + FileNotFoundError: If the credentials file doesn't exist + json.JSONDecodeError: If the credentials file is not valid JSON + KeyError: If the credentials file doesn't contain project_id + """ super().__init__() self._gcp_credentials = gcp_credentials @@ -32,18 +64,58 @@ def __init__(self, gcp_credentials: str): @property def gcp_credentials(self) -> str: + """Get the path to the GCP service account credentials file. + + Returns: + Path to the service account JSON file + """ return self._gcp_credentials @property def project_name(self) -> str: + """Get the GCP project ID from the credentials. + + Returns: + The GCP project ID string + """ return self._project_id @staticmethod def initialize(gcp_credentials: str) -> "GCPCredentials": + """Create a new GCPCredentials instance. + + Args: + gcp_credentials: Path to the GCP service account JSON file + + Returns: + A new GCPCredentials instance + """ return GCPCredentials(gcp_credentials) @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """Deserialize GCP credentials from configuration and cache. + + Loads credentials from multiple sources in priority order: + 1. User-provided config with credentials-json path + 2. GOOGLE_APPLICATION_CREDENTIALS environment variable + 3. GCP_SECRET_APPLICATION_CREDENTIALS environment variable + + Sets the `GOOGLE_APPLICATION_CREDENTIALS` environment variable if credentials + are loaded from SeBS config or SeBS-specific environment variables. + + Args: + config: Configuration dictionary potentially containing credentials + cache: Cache instance for storing/retrieving credentials + handlers: Logging handlers for error reporting + + Returns: + Initialized GCPCredentials instance + + Raises: + RuntimeError: If no valid credentials are found or if project ID + mismatch occurs between cache and new credentials + """ cached_config = cache.get_config("gcp") ret: GCPCredentials @@ -84,46 +156,79 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret - """ - Serialize to JSON for storage in cache. - """ + def serialize(self) -> Dict: + """Serialize credentials to dictionary for cache storage. + + Only stores the project_id, as the path to credentials might change or be + environment-dependent. It also avoids any potential security issues. - def serialize(self) -> dict: + Returns: + Dictionary containing project_id for cache storage + """ out = {"project_id": self._project_id} return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update the cache with current GCP project id. + + Args: + cache: Cache instance to update with project ID + """ cache.update_config(val=self._project_id, keys=["gcp", "credentials", "project_id"]) -""" - Class grouping resources allocated at the FaaS system to execute functions - and deploy various services. Examples might include IAM roles and API gateways - for HTTP triggers. +class GCPResources(Resources): + """Resource manager for serverless resources on Google Cloud Platform. - Storage resources are handled seperately. -""" + Currently, this class primarily inherits functionality from the base `Resources` + class, as we do not need more GCP-specific resources beyond standard storage buckets. + Attributes: + Inherits all attributes from the base Resources class + """ -class GCPResources(Resources): - def __init__(self): + def __init__(self) -> None: + """Initialize GCP resources manager.""" super().__init__(name="gcp") @staticmethod - def initialize(res: Resources, dct: dict): + def initialize(res: Resources, dct: Dict) -> "GCPResources": + """Initialize GCP resources from a dictionary configuration. + + Args: + res: Base Resources instance to initialize + dct: Dictionary containing resource configuration + + Returns: + Initialized GCPResources instance + """ ret = cast(GCPResources, res) super(GCPResources, GCPResources).initialize(ret, dct) return ret - """ - Serialize to JSON for storage in cache. - """ + def serialize(self) -> Dict: + """Serialize resources to dictionary for cache storage. - def serialize(self) -> dict: + Returns: + Dictionary representation of resources for cache storage + """ return super().serialize() @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": + def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": + """Deserialize GCP resources from configuration and cache. + + Loads resources from cache if available, otherwise initializes from + user configuration or creates empty resource set. + + Args: + config: Configuration dictionary potentially containing resources + cache: Cache instance for storing/retrieving resources + handlers: Logging handlers for status reporting + + Returns: + Initialized GCPResources instance + """ cached_config = cache.get_config("gcp") ret = GCPResources() @@ -144,43 +249,97 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resou return ret - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update the cache with current resource information. + + Args: + cache: Cache instance to update with resource data + """ super().update_cache(cache) -""" - FaaS system config defining cloud region (if necessary), credentials and - resources allocated. -""" +class GCPConfig(Config): + """Main configuration class for Google Cloud Platform deployment. + Combines credentials and resources into a complete configuration for + GCP serverless function deployment. Manages cloud region settings, + authentication, and resource allocation for the benchmarking suite. -class GCPConfig(Config): + This class handles serialization/deserialization for cache persistence + and provides validation for configuration consistency across sessions. + + Attributes: + _project_name: GCP project identifier + _region: GCP region for resource deployment + _credentials: GCP authentication credentials + _resources: Allocated GCP resources + """ _project_name: str - def __init__(self, credentials: GCPCredentials, resources: GCPResources): + def __init__(self, credentials: GCPCredentials, resources: GCPResources) -> None: + """Initialize GCP configuration with credentials and resources. + + Args: + credentials: GCP authentication credentials + resources: GCP resource allocation settings + """ super().__init__(name="gcp") self._credentials = credentials self._resources = resources @property def region(self) -> str: + """Get the GCP region for resource deployment. + + Returns: + GCP region identifier (e.g., 'us-central1') + """ return self._region @property def project_name(self) -> str: + """Get the GCP project name from credentials. + + Returns: + GCP project identifier string + """ return self.credentials.project_name @property def credentials(self) -> GCPCredentials: + """Get the GCP credentials instance. + + Returns: + GCP authentication credentials + """ return self._credentials @property def resources(self) -> GCPResources: + """Get the GCP resources instance. + + Returns: + GCP resource allocation settings + """ return self._resources @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Config": + def deserialize(config: Dict, cache: Cache, handlers: LoggingHandlers) -> "Config": + """Deserialize GCP configuration from dictionary and cache. + + Loads complete GCP configuration including credentials and resources. + Validates consistency between cached and provided configuration values, + updating cache with new user-provided values when they differ. + + Args: + config: Configuration dictionary with GCP settings + cache: Cache instance for storing/retrieving configuration + handlers: Logging handlers for status reporting + + Returns: + Initialized GCPConfig instance + """ cached_config = cache.get_config("gcp") credentials = cast(GCPCredentials, GCPCredentials.deserialize(config, cache, handlers)) resources = cast(GCPResources, GCPResources.deserialize(config, cache, handlers)) @@ -195,7 +354,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi GCPConfig.initialize(config_obj, config) # mypy makes a mistake here - updated_keys: List[Tuple[str, Tuple[str]]] = [["region", ["gcp", "region"]]] # type: ignore + updated_keys: List[Tuple[str, List[str]]] = [("region", ["gcp", "region"])] # type: ignore # for each attribute here, check if its version is different than the one provided by # user; if yes, then update the value for config_key, keys in updated_keys: @@ -213,11 +372,23 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi return config_obj @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: Dict) -> None: + """Initialize GCP configuration from dictionary. + + Args: + cfg: Config instance to initialize (will be cast to GCPConfig) + dct: Dictionary containing configuration values including region + """ config = cast(GCPConfig, cfg) config._region = dct["region"] - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize configuration to dictionary for cache storage. + + Returns: + Dictionary containing complete GCP configuration including + name, region, credentials, and resources + """ out = { "name": "gcp", "region": self._region, @@ -226,7 +397,14 @@ def serialize(self) -> dict: } return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update cache with current configuration values. + + Updates region, credentials, and resources in the cache. + + Args: + cache: Cache instance to update with configuration data + """ cache.update_config(val=self.region, keys=["gcp", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) diff --git a/sebs/gcp/datastore.py b/sebs/gcp/datastore.py index ae747fb1..f91a5998 100644 --- a/sebs/gcp/datastore.py +++ b/sebs/gcp/datastore.py @@ -1,3 +1,23 @@ +"""Google Cloud Datastore/Firestore implementation for SeBS NoSQL storage. + +This module provides NoSQL database functionality using Google Cloud Firestore +in Datastore mode. It manages database allocation, table creation, and data +operations for benchmarks requiring NoSQL storage capabilities. +To create databases, we use the gcloud CLI instance since there is no API +that we could access directly. + +Classes: + BenchmarkResources: Resource configuration for benchmark databases + Datastore: NoSQL storage implementation using Google Cloud Firestore + +Example: + Using Datastore for benchmark NoSQL operations: + + datastore = Datastore(cli_instance, cache, resources, region) + table_name = datastore.create_table("benchmark-name", "user-data", "user_id") + datastore.write_to_table("benchmark-name", table_name, data, primary_key, secondary_key) +""" + from dataclasses import dataclass from typing import Dict, List, Tuple, Optional @@ -11,32 +31,85 @@ @dataclass class BenchmarkResources: + """Resource configuration for a benchmark's Datastore database. + + Tracks the allocated database name, table kinds, and client instance + for a specific benchmark's NoSQL storage requirements. + + Attributes: + database: Name of the Firestore database in Datastore mode + kinds: List of entity kinds (table equivalents) in the database + database_client: Optional Datastore client instance (allocated dynamically) + """ database: str kinds: List[str] # We allocate this dynamically - ignore when caching database_client: Optional[datastore.Client] = None - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize benchmark resources for cache storage. + + Returns: + Dictionary containing database name and kinds list + """ return {"database": self.database, "kinds": self.kinds} @staticmethod - def deserialize(config: dict) -> "BenchmarkResources": + def deserialize(config: Dict) -> "BenchmarkResources": + """Deserialize benchmark resources from cached configuration. + + Args: + config: Dictionary containing cached resource configuration + + Returns: + BenchmarkResources instance with database and kinds + """ return BenchmarkResources(database=config["database"], kinds=config["kinds"]) class Datastore(NoSQLStorage): + """Google Cloud Firestore/Datastore implementation for NoSQL storage. + + Provides NoSQL database functionality using Google Cloud Firestore in + Datastore mode. Manages database allocation, entity kind creation, and + data operations for benchmarks requiring NoSQL capabilities. + + Attributes: + _cli_instance: gcloud CLI interface for database management + _region: GCP region for database allocation + _benchmark_resources: Mapping of benchmarks to their database resources + """ + @staticmethod def typename() -> str: + """Get the type name for this NoSQL storage implementation. + + Returns: + Type name string for GCP Datastore + """ return "GCP.Datastore" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment name for this NoSQL storage implementation. + + Returns: + Deployment name string 'gcp' + """ return "gcp" def __init__( self, cli_instance: GCloudCLI, cache_client: Cache, resources: Resources, region: str - ): + ) -> None: + """Initialize Datastore NoSQL storage manager. + + Args: + cli_instance: gcloud CLI interface for database operations + cache_client: Cache instance for storing resource state + resources: Resource configuration + region: GCP region for database allocation + """ super().__init__(region, cache_client, resources) self._cli_instance = cli_instance self._region = region @@ -44,14 +117,33 @@ def __init__( # Mapping: benchmark -> Datastore database self._benchmark_resources: Dict[str, BenchmarkResources] = {} - """ - GCP requires no table mappings: the name of "kind" is the same as benchmark name. - """ - def get_tables(self, benchmark: str) -> Dict[str, str]: + """Get table name mappings for a benchmark. + + GCP Datastore requires no table mappings as the entity kind name + is the same as the benchmark table name. + + Args: + benchmark: Name of the benchmark + + Returns: + Empty dictionary (no mappings needed for GCP) + """ return {} def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """Get the actual table name for a benchmark table. + + In Datastore's case, the table alias is the kind name if it's registered + for the benchmark. + + Args: + benchmark: Name of the benchmark + table: Logical table name + + Returns: + Table name if it exists in benchmark resources, None otherwise + """ if benchmark not in self._benchmark_resources: return None @@ -62,6 +154,14 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: return table def retrieve_cache(self, benchmark: str) -> bool: + """Retrieve benchmark resources from cache. + + Args: + benchmark: Name of the benchmark to retrieve resources for + + Returns: + True if resources were found in cache, False otherwise + """ if benchmark in self._benchmark_resources: return True @@ -73,23 +173,48 @@ def retrieve_cache(self, benchmark: str) -> bool: return False - def update_cache(self, benchmark: str): + def update_cache(self, benchmark: str) -> None: + """Update cache with current benchmark resources. + + Args: + benchmark: Name of the benchmark to cache resources for + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, self._benchmark_resources[benchmark].serialize() ) def benchmark_database(self, benchmark: str) -> str: + """Get the database name for a benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Database name for the benchmark's NoSQL resources + """ return self._benchmark_resources[benchmark].database def write_to_table( self, benchmark: str, table: str, - data: dict, + data: Dict, primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, - ): + ) -> None: + """Write data to a Datastore entity kind (table). + + Args: + benchmark: Name of the benchmark + table: Name of the table (entity kind) + data: Dictionary of data to write + primary_key: Primary key tuple (name, value) + secondary_key: Secondary key tuple (name, value) - required for GCP + + Raises: + AssertionError: If secondary_key is None (required for GCP) + """ res = self._benchmark_resources[benchmark] table_name = self._get_table_name(benchmark, table) @@ -117,6 +242,26 @@ def write_to_table( def create_table( self, benchmark: str, name: str, primary_key: str, _: Optional[str] = None ) -> str: + """Create a new entity kind (table) in Datastore. + + Creates a new Firestore database in Datastore mode if needed using gloud CLI. + Datastore kinds are schemaless and created implicitly when an entity of that + kind is first written. This method primarily ensures the database exists and + registers the kind name for the benchmark. The `primary_key` is noted but + not directly used to create schema for the kind itself, as Datastore is schemaless. + + Args: + benchmark: Name of the benchmark + name: Name of the entity kind (table) to create + primary_key: Primary key field name + _: Unused parameter for compatibility + + Returns: + Name of the created entity kind + + Raises: + RuntimeError: If database operations fail + """ benchmark_resources = self._benchmark_resources.get(benchmark, None) @@ -174,7 +319,29 @@ def create_table( return name def clear_table(self, name: str) -> str: + """Clear all entities from a table. + + Args: + name: Name of the table to clear + + Returns: + Table name + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """Remove a table from the database. + + Args: + name: Name of the table to remove + + Returns: + Table name + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 6736c1ca..d4e2eedd 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -1,4 +1,20 @@ -from typing import cast, Optional +"""Google Cloud Platform function implementation for SeBS. + +This module provides the GCPFunction class that represents a Cloud Function +deployed on Google Cloud Platform. It handles function metadata, serialization, +deserialization, and bucket management for code deployment. + +Classes: + GCPFunction: Represents a deployed Google Cloud Function with GCP-specific features + +Example: + Creating a GCP function instance: + + config = FunctionConfig(memory=256, timeout=60, runtime="python39") + function = GCPFunction("my-function", "benchmark-name", "hash123", config) +""" + +from typing import cast, Dict, Optional from sebs.faas.config import Resources from sebs.faas.function import Function, FunctionConfig @@ -6,6 +22,15 @@ class GCPFunction(Function): + """Represents a Google Cloud Function with GCP-specific functionality. + + Extends the base Function class with GCP-specific features like bucket + management for code storage and GCP-specific serialization/deserialization. + + Attributes: + bucket: Cloud Storage bucket name containing the function's code + """ + def __init__( self, name: str, @@ -13,22 +38,56 @@ def __init__( code_package_hash: str, cfg: FunctionConfig, bucket: Optional[str] = None, - ): + ) -> None: + """Initialize a GCP Cloud Function instance. + + Args: + name: Function name on GCP + benchmark: Name of the benchmark this function implements + code_package_hash: Hash of the code package for version tracking + cfg: Function configuration (memory, timeout, etc.) + bucket: Optional Cloud Storage bucket name for code storage + """ super().__init__(benchmark, name, code_package_hash, cfg) self.bucket = bucket @staticmethod def typename() -> str: + """Get the type name for this function implementation. + + Returns: + Type name string for GCP functions + """ return "GCP.GCPFunction" - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize function to dictionary for cache storage. + Adds code bucket in cloud storage. + + Returns: + Dictionary containing function state including bucket information + """ return { **super().serialize(), "bucket": self.bucket, } @staticmethod - def deserialize(cached_config: dict) -> "GCPFunction": + def deserialize(cached_config: Dict) -> "GCPFunction": + """Deserialize function from cached configuration. + + Reconstructs a GCPFunction instance from cached data including + triggers and configuration. Handles both Library and HTTP triggers. + + Args: + cached_config: Dictionary containing cached function configuration + + Returns: + Reconstructed GCPFunction instance with triggers + + Raises: + AssertionError: If an unknown trigger type is encountered + """ from sebs.faas.function import Trigger from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger @@ -49,7 +108,19 @@ def deserialize(cached_config: dict) -> "GCPFunction": ret.add_trigger(trigger_type.deserialize(trigger)) return ret - def code_bucket(self, benchmark: str, storage_client: GCPStorage): + def code_bucket(self, benchmark: str, storage_client: GCPStorage) -> str: + """Get or create the Cloud Storage bucket for function code. + + Returns the bucket name where the function's code is stored, + creating a deployment bucket if none is assigned. + + Args: + benchmark: Benchmark name (unused but kept for compatibility) + storage_client: GCP storage client for bucket operations + + Returns: + Cloud Storage bucket name containing function code + """ if not self.bucket: self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 187d8cda..9ed9b197 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -1,3 +1,29 @@ +"""Google Cloud Platform (GCP) serverless system implementation. + +This module provides the main GCP implementation wiht function deployment, management, +monitoring, and resource allocation. It integrates with Google Cloud Functions, +Cloud Storage, Cloud Monitoring, and Cloud Logging. + +The module handles: +- Function creation, updating, and lifecycle management +- Code packaging and deployment to Cloud Functions +- HTTP and library trigger management +- Performance metrics collection via Cloud Monitoring +- Execution logs retrieval via Cloud Logging +- Cold start enforcement for benchmarking +- Storage bucket management for code deployment + +Classes: + GCP: Main system class implementing the FaaS System interface + +Example: + Basic GCP system initialization: + + config = GCPConfig(credentials, resources) + gcp_system = GCP(system_config, config, cache, docker_client, logging_handlers) + gcp_system.initialize() +""" + import docker import os import logging @@ -25,16 +51,21 @@ from sebs.gcp.function import GCPFunction from sebs.utils import LoggingHandlers -""" - This class provides basic abstractions for the FaaS system. - It provides the interface for initialization of the system and storage - services, creation and update of serverless functions and querying - logging and measurements services to obtain error messages and performance - measurements. -""" - class GCP(System): + """Google Cloud Platform serverless system implementation. + + Provides complete integration with Google Cloud Functions including deployment, + monitoring, logging, and resource management. Handles code packaging, function + lifecycle management, trigger creation, and performance metrics collection. + + Attributes: + _config: GCP-specific configuration including credentials and region + function_client: Google Cloud Functions API client + cold_start_counter: Counter for enforcing cold starts in benchmarking + logging_handlers: Logging configuration for status reporting + """ + def __init__( self, system_config: SeBSConfig, @@ -42,7 +73,16 @@ def __init__( cache_client: Cache, docker_client: docker.client, logging_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize GCP serverless system. + + Args: + system_config: General SeBS system configuration + config: GCP-specific configuration with credentials and settings + cache_client: Cache instance for storing function and resource state + docker_client: Docker client for container operations (if needed) + logging_handlers: Logging configuration for status reporting + """ super().__init__( system_config, cache_client, @@ -56,38 +96,83 @@ def __init__( @property def config(self) -> GCPConfig: + """Get the GCP configuration instance. + + Returns: + GCP configuration with credentials and region settings + """ return self._config @staticmethod - def name(): + def name() -> str: + """Get the platform name identifier. + + Returns: + Platform name string 'gcp' + """ return "gcp" @staticmethod - def typename(): + def typename() -> str: + """Get the platform type name for display. + + Returns: + Platform type string 'GCP' + """ return "GCP" @staticmethod def function_type() -> "Type[Function]": + """Get the function class type for this platform. + + Returns: + GCPFunction class type + """ return GCPFunction - """ - Initialize the system. After the call the local or remote - FaaS system should be ready to allocate functions, manage - storage resources and invoke functions. + def initialize( + self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None + ) -> None: + """Initialize the GCP system for function deployment and management. - :param config: systems-specific parameters - """ + Sets up the Cloud Functions API client and initializes system resources + including storage buckets and other required infrastructure. + After this call, the GCP system should be ready to allocate functions, + manage storage, and invoke functions. - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + Args: + config: Additional system-specific configuration parameters + resource_prefix: Optional prefix for resource naming to avoid conflicts + """ self.function_client = build("cloudfunctions", "v1", cache_discovery=False) self.initialize_resources(select_prefix=resource_prefix) def get_function_client(self): + """Get the Google Cloud Functions API client. + + The client is initialized during the `initialize` call. + + Returns: + Initialized Cloud Functions API client + """ return self.function_client def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """Generate a default function name for the given benchmark. + + Creates a standardized function name using resource ID, benchmark name, + language, and version information. Formats the name according to GCP + Cloud Functions naming requirements. + + Args: + code_package: Benchmark package containing metadata + resources: Optional resource configuration for ID generation + + Returns: + Formatted function name suitable for GCP Cloud Functions + """ # Create function name resource_id = resources.resources_id if resources else self.config.resources.resources_id func_name = "sebs-{}-{}-{}-{}".format( @@ -100,27 +185,24 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """Format function name according to GCP Cloud Functions requirements. + + Converts function names to comply with GCP naming rules by replacing + hyphens and dots with underscores. GCP functions must begin with a letter + and can only contain letters, numbers, and underscores. + + Args: + func_name: Raw function name to format + + Returns: + GCP-compliant function name + """ # GCP functions must begin with a letter # however, we now add by default `sebs` in the beginning func_name = func_name.replace("-", "_") func_name = func_name.replace(".", "_") return func_name - """ - Apply the system-specific code packaging routine to build benchmark. - The benchmark creates a code directory with the following structure: - - [benchmark sources] - - [benchmark resources] - - [dependence specification], e.g. requirements.txt or package.json - - [handlers implementation for the language and deployment] - - This step allows us to change the structure above to fit different - deployment requirements, Example: a zip file for AWS or a specific - directory structure for Azure. - - :return: path to packaged code and its size - """ - def package_code( self, directory: str, @@ -131,6 +213,33 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """Package benchmark code for GCP Cloud Functions deployment. + + Transforms the benchmark code directory structure to meet GCP Cloud Functions + requirements. Creates a zip archive with the appropriate handler file naming + and directory structure for the specified language runtime. + + The packaging process: + 1. Creates a 'function' subdirectory for benchmark sources + 2. Renames handler files to GCP-required names (handler.py -> main.py) + 3. Creates a zip archive for deployment + 4. Restores original file structure + + Args: + directory: Path to the benchmark code directory + language_name: Programming language (python, nodejs) + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (x86_64, arm64) + benchmark: Benchmark name for archive naming + is_cached: Whether this package is from cache + container_deployment: Whether to use container deployment (unsupported) + + Returns: + Tuple of (archive_path, archive_size_bytes, container_uri) + + Raises: + NotImplementedError: If container_deployment is True + """ container_uri = "" @@ -165,8 +274,8 @@ def package_code( Note that the function GCP.recursive_zip is slower than the use of e.g. `utils.execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True)` or `shutil.make_archive(benchmark_archive, direcory, directory)` - But both of the two alternatives need a chance of directory - (shutil.make_archive does the directorychange internaly) + But both of the two alternatives need a change of directory + (shutil.make_archive does the directory change internaly) which leads to a "race condition" when running several benchmarks in parallel, since a change of the current directory is NOT Thread specfic. """ @@ -190,6 +299,26 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "GCPFunction": + """Create a new GCP Cloud Function or update existing one. + + Deploys a benchmark as a Cloud Function, handling code upload to Cloud Storage, + function creation with proper configuration, and IAM policy setup for + unauthenticated invocations (HTTP triggers). + If the function already exists, updates it instead. + + Args: + code_package: Benchmark package with code and configuration + func_name: Name for the Cloud Function + container_deployment: Whether to use container deployment (unsupported) + container_uri: Container image URI (unused for GCP) + + Returns: + GCPFunction instance representing the deployed function + + Raises: + NotImplementedError: If container_deployment is True + RuntimeError: If function creation or IAM configuration fails + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in GCP") @@ -311,6 +440,23 @@ def create_function( return function def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """Create a trigger for the given function. + + Creates HTTP triggers for Cloud Functions, waiting for function deployment + to complete before extracting the trigger URL. + Only HTTP triggers are supported here; Library triggers are added by + default during function creation. + + Args: + function: Function instance to create trigger for + trigger_type: Type of trigger to create (only HTTP supported) + + Returns: + Created trigger instance with URL and configuration + + Raises: + RuntimeError: If trigger type is not supported + """ from sebs.gcp.triggers import HTTPTrigger if trigger_type == Trigger.TriggerType.HTTP: @@ -341,7 +487,15 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) self.cache_client.update_function(function) return trigger - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """Configure a cached function instance for use. + + Sets up library triggers for functions loaded from cache, ensuring + they have the proper deployment client and logging configuration. + + Args: + function: Cached function instance to configure + """ from sebs.faas.function import Trigger from sebs.gcp.triggers import LibraryTrigger @@ -357,7 +511,23 @@ def update_function( code_package: Benchmark, container_deployment: bool, container_uri: str, - ): + ) -> None: + """Update an existing Cloud Function with new code and configuration. + + Uploads new code package to Cloud Storage and patches the existing function + with updated runtime, memory, timeout, and environment variables. Waits + for deployment to complete before returning. + + Args: + function: Existing function instance to update + code_package: New benchmark package with updated code + container_deployment: Whether to use container deployment (unsupported) + container_uri: Container image URI (unused) + + Raises: + NotImplementedError: If container_deployment is True + RuntimeError: If function update fails after maximum retries + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in GCP") @@ -418,7 +588,19 @@ def update_function( ) self.logging.info("Published new function code and configuration.") - def _update_envs(self, full_function_name: str, envs: dict) -> dict: + def _update_envs(self, full_function_name: str, envs: Dict) -> Dict: + """Merge new environment variables with existing function environment. + + Retrieves current function environment variables and merges them with + new variables, with new variables taking precedence on conflicts. + + Args: + full_function_name: Fully qualified function name + envs: New environment variables to add/update + + Returns: + Merged environment variables dictionary + """ get_req = ( self.function_client.projects().locations().functions().get(name=full_function_name) @@ -432,7 +614,18 @@ def _update_envs(self, full_function_name: str, envs: dict) -> dict: return envs - def _generate_function_envs(self, code_package: Benchmark) -> dict: + def _generate_function_envs(self, code_package: Benchmark) -> Dict: + """Generate environment variables for function based on benchmark requirements. + + Creates environment variables needed by the benchmark, such as NoSQL + database connection information. + + Args: + code_package: Benchmark package with module requirements + + Returns: + Dictionary of environment variables for the function + """ envs = {} if code_package.uses_nosql: @@ -447,8 +640,24 @@ def _generate_function_envs(self, code_package: Benchmark) -> dict: return envs def update_function_configuration( - self, function: Function, code_package: Benchmark, env_variables: dict = {} - ): + self, function: Function, code_package: Benchmark, env_variables: Dict = {} + ) -> int: + """Update function configuration including memory, timeout, and environment. + + Updates the Cloud Function's memory allocation, timeout, and environment + variables without changing the code. Waits for deployment to complete. + + Args: + function: Function instance to update + code_package: Benchmark package with configuration requirements + env_variables: Additional environment variables to set + + Returns: + Version ID of the updated function + + Raises: + RuntimeError: If configuration update fails after maximum retries + """ assert code_package.has_input_processed @@ -520,22 +729,43 @@ def update_function_configuration( return versionId @staticmethod - def get_full_function_name(project_name: str, location: str, func_name: str): - return f"projects/{project_name}/locations/{location}/functions/{func_name}" + def get_full_function_name(project_name: str, location: str, func_name: str) -> str: + """Generate the fully qualified function name for GCP API calls. - def prepare_experiment(self, benchmark): - logs_bucket = self._system_resources.get_storage().add_output_bucket( - benchmark, suffix="logs" - ) - return logs_bucket + Args: + project_name: GCP project ID + location: GCP region/location + func_name: Function name + + Returns: + Fully qualified function name in GCP format + """ + return f"projects/{project_name}/locations/{location}/functions/{func_name}" def shutdown(self) -> None: + """Shutdown the GCP system and clean up resources. + + Performs cleanup of system resources and calls parent shutdown method. + """ cast(GCPSystemResources, self._system_resources).shutdown() super().shutdown() def download_metrics( - self, function_name: str, start_time: int, end_time: int, requests: dict, metrics: dict - ): + self, function_name: str, start_time: int, end_time: int, requests: Dict, metrics: Dict + ) -> None: + """Download execution metrics and logs from GCP monitoring services. + + Retrieves function execution times from Cloud Logging and performance + metrics from Cloud Monitoring. Processes logs to extract execution times + and collects metrics like memory usage and network egress. + + Args: + function_name: Name of the function to collect metrics for + start_time: Start timestamp for metric collection (Unix timestamp) + end_time: End timestamp for metric collection (Unix timestamp) + requests: Dictionary of requests keyed by execution ID + metrics: Dictionary to populate with collected metrics + """ from google.api_core import exceptions from time import sleep @@ -651,7 +881,19 @@ def wrapper(gen): } ] - def _enforce_cold_start(self, function: Function, code_package: Benchmark): + def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> int: + """Force a cold start by updating function configuration. + + Triggers a cold start by updating the function's environment variables + with a unique counter value, forcing GCP to create a new instance. + + Args: + function: Function instance to enforce cold start on + code_package: Benchmark package for configuration + + Returns: + Version ID of the updated function + """ self.cold_start_counter += 1 new_version = self.update_function_configuration( @@ -660,7 +902,16 @@ def _enforce_cold_start(self, function: Function, code_package: Benchmark): return new_version - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """Enforce cold starts for multiple functions simultaneously. + + Updates all provided functions to force cold starts and waits for + all deployments to complete before returning. + + Args: + functions: List of functions to enforce cold starts on + code_package: Benchmark package for configuration + """ new_versions = [] for func in functions: @@ -687,6 +938,18 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) self.cold_start_counter += 1 def get_functions(self, code_package: Benchmark, function_names: List[str]) -> List["Function"]: + """Retrieve multiple function instances and ensure they are deployed. + + Gets function instances for the provided names and waits for all + functions to be in ACTIVE deployment state. + + Args: + code_package: Benchmark package for function creation + function_names: List of function names to retrieve + + Returns: + List of deployed function instances + """ functions: List["Function"] = [] undeployed_functions_before = [] @@ -716,6 +979,15 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L return functions def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: + + """Check if a function is deployed and optionally verify its version. + Args: + func_name: Name of the function to check + versionId: Optional specific version ID to verify (-1 to check any) + + Returns: + Tuple of (is_deployed, current_version_id) + """ name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) @@ -726,31 +998,32 @@ def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: return (status_res["versionId"] == versionId, status_res["versionId"]) def deployment_version(self, func: Function) -> int: + """Get the current deployment version ID of a function. + + Args: + func: Function instance to check + + Returns: + Current version ID of the function + """ name = GCP.get_full_function_name(self.config.project_name, self.config.region, func.name) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() return int(status_res["versionId"]) - # @abstractmethod - # def get_invocation_error(self, function_name: str, - # start_time: int, end_time: int): - # pass - - # @abstractmethod - # def download_metrics(self): - # pass - - """ - Helper method for recursive_zip + @staticmethod + def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile) -> None: + """Recursively add files and directories to a zip archive. - :param base_directory: path to directory to be zipped - :param path: path to file of subdirectory to be zipped - :param archive: ZipFile object - """ + Helper method for recursive_zip that handles directory traversal + and adds files with relative paths to the archive. - @staticmethod - def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): + Args: + base_directory: Base directory path for relative path calculation + path: Current path being processed (file or directory) + archive: ZipFile object to add files to + """ paths = os.listdir(path) for p in paths: directory = os.path.join(path, p) @@ -760,19 +1033,20 @@ def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): if directory != archive.filename: # prevent form including itself archive.write(directory, os.path.relpath(directory, base_directory)) - """ - https://gist.github.com/felixSchl/d38b455df8bf83a78d3d + @staticmethod + def recursive_zip(directory: str, archname: str) -> bool: + """Create a zip archive of a directory with relative paths. - Zip directory with relative paths given an absolute path - If the archive exists only new files are added and updated. - If the archive does not exist a new one is created. + Creates a compressed zip archive of the specified directory, preserving + the relative directory structure. Uses maximum compression level. - :param path: absolute path to the directory to be zipped - :param archname: path to the zip file - """ + Args: + directory: Absolute path to the directory to be zipped + archname: Path where the zip file should be created - @staticmethod - def recursive_zip(directory: str, archname: str): + Returns: + True if archiving was successful + """ archive = zipfile.ZipFile(archname, "w", zipfile.ZIP_DEFLATED, compresslevel=9) if os.path.isdir(directory): GCP.helper_zip(directory, directory, archive) diff --git a/sebs/gcp/resources.py b/sebs/gcp/resources.py index 0a7d5c14..a54b9e5a 100644 --- a/sebs/gcp/resources.py +++ b/sebs/gcp/resources.py @@ -1,3 +1,20 @@ +"""System resource management for Google Cloud Platform. + +This module provides the GCPSystemResources class that manages all GCP resources +required for serverless benchmarking, including storage, NoSQL databases, and +CLI tools. + +Classes: + GCPSystemResources: Main resource manager for GCP services + +Example: + Creating and using GCP system resources: + + resources = GCPSystemResources(system_config, gcp_config, cache, docker_client, handlers) + storage = resources.get_storage(replace_existing=False) + datastore = resources.get_nosql_storage() +""" + from typing import cast, Optional from sebs.config import SeBSConfig @@ -13,12 +30,34 @@ class GCPSystemResources(SystemResources): + """System resource manager for Google Cloud Platform services. + + Handles resource initialization, configuration, and cleanup. + + Attributes: + _storage: Cloud Storage instance for object storage + _nosql_storage: Datastore instance for NoSQL operations + _cli_instance: gcloud CLI interface for administrative operations + _system_config: SeBS system configuration + _logging_handlers: Logging configuration + """ + @staticmethod def typename() -> str: + """Get the type name for this resource manager. + + Returns: + Type name string for GCP system resources + """ return "GCP.SystemResources" @property def config(self) -> GCPConfig: + """Get the GCP configuration instance. + + Returns: + GCP configuration with credentials and settings + """ return cast(GCPConfig, self._config) def __init__( @@ -28,7 +67,16 @@ def __init__( cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, - ): + ) -> None: + """Initialize GCP system resources manager. + + Args: + system_config: SeBS system configuration + config: GCP-specific configuration + cache_client: Cache instance for resource state + docker_client: Docker client for containerized operations + logger_handlers: Logging configuration + """ super().__init__(config, cache_client, docker_client) self._logging_handlers = logger_handlers @@ -37,15 +85,19 @@ def __init__( self._cli_instance: Optional[GCloudCLI] = None self._system_config = system_config - """ - Access persistent storage instance. - It might be a remote and truly persistent service (AWS S3, Azure Blob..), - or a dynamically allocated local instance. + def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: + """Get or create the Cloud Storage instance. - :param replace_existing: replace benchmark input data if exists already - """ + Provides access to Google Cloud Storage for persistent object storage. + Creates the storage instance if it doesn't exist, or updates the + replace_existing setting if provided. - def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: + Args: + replace_existing: Whether to replace existing benchmark input data + + Returns: + Initialized GCP storage instance + """ if not self._storage: self._storage = GCPStorage( self.config.region, @@ -59,6 +111,14 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: return self._storage def get_nosql_storage(self) -> Datastore: + """Get or create the Datastore instance for NoSQL operations. + + Provides access to Google Cloud Firestore/Datastore for NoSQL + database operations required by benchmarks. + + Returns: + Initialized Datastore instance + """ if not self._nosql_storage: self._nosql_storage = Datastore( self.cli_instance, self._cache_client, self.config.resources, self.config.region @@ -67,6 +127,14 @@ def get_nosql_storage(self) -> Datastore: @property def cli_instance(self) -> GCloudCLI: + """Get or create the gcloud CLI instance. + + Provides access to a containerized gcloud CLI for administrative + operations. Initializes and authenticates the CLI if needed. + + Returns: + Authenticated gcloud CLI instance + """ if self._cli_instance is None: self._cli_instance = GCloudCLI( self.config.credentials, self._system_config, self._docker_client @@ -76,10 +144,21 @@ def cli_instance(self) -> GCloudCLI: self._cli_instance.login(self.config.credentials.project_name) return self._cli_instance - def initialize_cli(self, cli: GCloudCLI): + def initialize_cli(self, cli: GCloudCLI) -> None: + """Initialize with an existing CLI instance. + + Uses a pre-configured CLI instance instead of creating a new one. + + Args: + cli: Pre-configured gcloud CLI instance + """ self._cli_instance = cli self._cli_instance_stop = False def shutdown(self) -> None: + """Shutdown system resources and clean up. + + Stops the gcloud CLI container if it was created by this instance. + """ if self._cli_instance and self._cli_instance_stop: self._cli_instance.shutdown() diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index c578966f..aed3120a 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -1,3 +1,20 @@ +"""Google Cloud Storage implementation for SeBS. + +This module provides the GCPStorage class that implements object storage operations +using Google Cloud Storage. It handles bucket management, file uploads/downloads, +and storage resource allocation for benchmarks and deployment artifacts. + +Classes: + GCPStorage: Google Cloud Storage implementation with bucket and blob management + +Example: + Using GCP storage for benchmark files: + + storage = GCPStorage(region, cache, resources, replace_existing=False) + bucket = storage.add_benchmark_bucket("my-benchmark") + storage.upload(bucket, "/path/to/file.zip", "benchmark-code.zip") +""" + import logging import os import uuid @@ -12,34 +29,93 @@ class GCPStorage(PersistentStorage): + """Google Cloud Storage implementation providing persistent storage. + + Handles bucket creation, file operations, and storage resource management + for benchmarks, deployment artifacts, and experiment outputs. + + Attributes: + client: Google Cloud Storage client instance + cached: Whether storage operations use cached data + """ + @staticmethod def typename() -> str: + """Get the type name for this storage implementation. + + Returns: + Type name string for GCP storage + """ return "GCP.GCPStorage" @staticmethod - def deployment_name(): + def deployment_name() -> str: + """Get the deployment name for this storage implementation. + + Returns: + Deployment name string 'gcp' + """ return "gcp" @property def replace_existing(self) -> bool: + """Flag indicating whether to replace existing files in buckets.""" return self._replace_existing @replace_existing.setter def replace_existing(self, val: bool): + """Set the flag for replacing existing files.""" self._replace_existing = val def __init__( self, region: str, cache_client: Cache, resources: Resources, replace_existing: bool - ): + ) -> None: + """Initialize GCP Storage client. + + Args: + region: GCP region for storage resources + cache_client: Cache instance for storing storage state + resources: Resource configuration + replace_existing: Whether to replace existing files during uploads + """ super().__init__(region, cache_client, resources, replace_existing) self.replace_existing = replace_existing self.client = gcp_storage.Client() self.cached = False def correct_name(self, name: str) -> str: + """Correct bucket name to meet GCP naming requirements. + Currently it does nothing - no special requirements on GCP. + + Args: + name: Original bucket name + + Returns: + Corrected bucket name (no changes needed for GCP) + """ return name - def _create_bucket(self, name, buckets: List[str] = [], randomize_name: bool = False): + def _create_bucket( + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False + ) -> str: + """Create a new Cloud Storage bucket or return existing one. + + Checks if a bucket with a similar name (if `name` is a prefix) already exists + in the provided `buckets` list. If `randomize_name` is True, appends a + random string to make the name unique. + + Args: + name: Base name for the bucket + buckets: List of existing bucket names to check + randomize_name: Whether to append random suffix to avoid name conflicts + + Returns: + Name of the created or existing bucket + """ + + if buckets is None: + buckets = [] + found_bucket = False for bucket_name in buckets: if name in bucket_name: @@ -62,12 +138,26 @@ def _create_bucket(self, name, buckets: List[str] = [], randomize_name: bool = F return bucket_name def download(self, bucket_name: str, key: str, filepath: str) -> None: + """Download a file from Cloud Storage. + + Args: + bucket_name: Name of the storage bucket + key: Object key/path in the bucket + filepath: Local file path to save the downloaded file + """ logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) bucket_instance = self.client.bucket(bucket_name) blob = bucket_instance.blob(key) blob.download_to_filename(filepath) - def upload(self, bucket_name: str, filepath: str, key: str): + def upload(self, bucket_name: str, filepath: str, key: str) -> None: + """Upload a file to Cloud Storage. + + Args: + bucket_name: Name of the storage bucket + filepath: Local file path to upload + key: Object key/path in the bucket for the uploaded file + """ logging.info("Upload {} to {}".format(filepath, bucket_name)) bucket_instance = self.client.bucket(bucket_name) blob = bucket_instance.blob(key, chunk_size=4 * 1024 * 1024) @@ -75,6 +165,18 @@ def upload(self, bucket_name: str, filepath: str, key: str): blob.upload_from_filename(filepath) def exists_bucket(self, bucket_name: str) -> bool: + """Check if a Cloud Storage bucket exists. + + Handles `exceptions.Forbidden` which can occur if the bucket exists + but is not accessible by the current credentials (treated as not existing + for SeBS purposes). + + Args: + bucket_name: Name of the bucket to check + + Returns: + True if bucket exists and is accessible, False otherwise + """ try: return self.client.bucket(bucket_name).exists() # 403 returned when the bucket exists but is owned by another user @@ -82,12 +184,29 @@ def exists_bucket(self, bucket_name: str) -> bool: return False def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """List objects in a Cloud Storage bucket with optional prefix filter. + + Args: + bucket_name: Name of the bucket to list + prefix: Optional prefix to filter objects + + Returns: + List of object names in the bucket matching the prefix + """ bucket_instance = self.client.get_bucket(bucket_name) all_blobs = list(self.client.list_blobs(bucket_instance)) blobs = [blob.name for blob in all_blobs if prefix in blob.name] return blobs def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """List Cloud Storage buckets, optionally filtered by name. + + Args: + bucket_name: Optional bucket name filter + + Returns: + List of bucket names, filtered if bucket_name is provided + """ all_buckets = list(self.client.list_buckets()) if bucket_name is not None: buckets = [bucket.name for bucket in all_buckets if bucket_name in bucket.name] @@ -95,13 +214,38 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: buckets = [bucket.name for bucket in all_buckets] return buckets - def remove_bucket(self, bucket_name: str): + def remove_bucket(self, bucket_name: str) -> None: + """Remove a Cloud Storage bucket. + + Args: + bucket_name: Name of the bucket to remove + """ self.client.get_bucket(bucket_name).delete() - def clean_bucket(self, bucket: str): + def clean_bucket(self, bucket: str) -> None: + """Clean all objects from a Cloud Storage bucket. + + Args: + bucket: Name of the bucket to clean + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: + """Upload function for batch operations with caching support. + + Uploads a file to the appropriate benchmark bucket, respecting cache + settings and replace_existing configuration. + + This is primarily used by benchmarks to upload input data. + + Args: + path_idx: Index of the input path prefix + key: Object key for the uploaded file + filepath: Local file path to upload + """ if self.cached and not self.replace_existing: return diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 13cc3d6c..744cece7 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -1,3 +1,24 @@ +"""Trigger implementations for Google Cloud Functions. + +This module provides trigger classes for invoking Cloud Functions through different +mechanisms including direct library calls and HTTP requests. Supports both +synchronous and asynchronous invocation patterns. + +Classes: + LibraryTrigger: Direct Cloud Functions API invocation trigger + HTTPTrigger: HTTP endpoint invocation trigger + +Example: + Using a library trigger for direct invocation: + + trigger = LibraryTrigger("my-function", gcp_client) + result = trigger.sync_invoke({"input": "data"}) + Using an HTTP trigger: + + trigger = HTTPTrigger("https://region-project.cloudfunctions.net/my-function") + result = trigger.sync_invoke({"input": "data"}) +""" + import concurrent.futures import datetime import json @@ -9,29 +30,80 @@ class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + """Direct Cloud Functions API trigger for synchronous invocation. + + Uses the Google Cloud Functions API to invoke functions directly through + the cloud functions client. Provides precise execution timing and error + handling. Waits for function deployment before invocation. + + Attributes: + name: Function name to invoke + _deployment_client: GCP client for API operations + """ + + def __init__(self, fname: str, deployment_client: Optional[GCP] = None) -> None: + """Initialize library trigger for direct function invocation. + + Args: + fname: Name of the Cloud Function to invoke + deployment_client: Optional GCP client for API operations + """ super().__init__() self.name = fname self._deployment_client = deployment_client @staticmethod def typename() -> str: + """Get the type name for this trigger implementation. + + Returns: + Type name string for library triggers + """ return "GCP.LibraryTrigger" @property def deployment_client(self) -> GCP: + """Get the GCP deployment client. + + Returns: + GCP client instance for API operations + + Raises: + AssertionError: If deployment client is not set + """ assert self._deployment_client return self._deployment_client @deployment_client.setter - def deployment_client(self, deployment_client: GCP): + def deployment_client(self, deployment_client: GCP) -> None: + """Set the GCP deployment client. + + Args: + deployment_client: GCP client instance + """ self._deployment_client = deployment_client @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type for this implementation. + + Returns: + Library trigger type enum value + """ return Trigger.TriggerType.LIBRARY - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: Dict) -> ExecutionResult: + """Synchronously invoke the Cloud Function using the API. + + Waits for function deployment, then invokes via Cloud Functions API. + Measures execution time and handles errors. + + Args: + payload: Input data to send to the function + + Returns: + ExecutionResult with timing, output, and error information + """ self.logging.info(f"Invoke function {self.name}") @@ -71,43 +143,121 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: gcp_result.parse_benchmark_output(output) return gcp_result - def async_invoke(self, payload: dict): + def async_invoke(self, payload: Dict): + """Asynchronously invoke the Cloud Function. + + Note: This method is not currently implemented for GCP's LibraryTrigger. + GCP's `functions.call` API is synchronous. Asynchronous behavior could + need to be implemented using a thread pool or similar mechanism if desired. + + Args: + payload: Input data to send to the function + + Raises: + NotImplementedError: Async invocation not implemented for library triggers + """ raise NotImplementedError() - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize trigger to dictionary for cache storage. + + Returns: + Dictionary containing trigger type and name + """ return {"type": "Library", "name": self.name} @staticmethod - def deserialize(obj: dict) -> Trigger: + def deserialize(obj: Dict) -> Trigger: + """Deserialize trigger from cached configuration. + + Args: + obj: Dictionary containing serialized trigger data + + Returns: + Reconstructed LibraryTrigger instance + """ return LibraryTrigger(obj["name"]) class HTTPTrigger(Trigger): - def __init__(self, url: str): + """HTTP endpoint trigger for Cloud Functions invocation. + + Invokes Cloud Functions through their HTTP endpoints, supporting both + synchronous and asynchronous execution patterns using HTTP requests. + + Attributes: + url: HTTP endpoint URL for the Cloud Function + """ + + def __init__(self, url: str) -> None: + """Initialize HTTP trigger with function endpoint URL. + + Args: + url: HTTP endpoint URL for the Cloud Function + """ super().__init__() self.url = url @staticmethod def typename() -> str: + """Get the type name for this trigger implementation. + + Returns: + Type name string for HTTP triggers + """ return "GCP.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type for this implementation. + + Returns: + HTTP trigger type enum value + """ return Trigger.TriggerType.HTTP - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: Dict) -> ExecutionResult: + """Synchronously invoke the Cloud Function via HTTP. + + Args: + payload: Input data to send to the function + + Returns: + ExecutionResult from the HTTP invocation + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url) - def async_invoke(self, payload: dict) -> concurrent.futures.Future: + def async_invoke(self, payload: Dict) -> concurrent.futures.Future: + """Asynchronously invoke the Cloud Function via HTTP. + + Args: + payload: Input data to send to the function + + Returns: + Future object for the async HTTP invocation + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut - def serialize(self) -> dict: + def serialize(self) -> Dict: + """Serialize trigger to dictionary for cache storage. + + Returns: + Dictionary containing trigger type and URL + """ return {"type": "HTTP", "url": self.url} @staticmethod - def deserialize(obj: dict) -> Trigger: + def deserialize(obj: Dict) -> Trigger: + """Deserialize trigger from cached configuration. + + Args: + obj: Dictionary containing serialized trigger data + + Returns: + Reconstructed HTTPTrigger instance + """ return HTTPTrigger(obj["url"]) diff --git a/sebs/local/__init__.py b/sebs/local/__init__.py index caded0a6..5bf08d0d 100644 --- a/sebs/local/__init__.py +++ b/sebs/local/__init__.py @@ -1,3 +1,18 @@ +"""SeBS local execution platform module. + +This module provides the local execution platform by running serverless functions +locally using Docker containers, providing a development and testing environment +that mimics serverless execution without requiring cloud platform deployment. + +Key components: +- Local: Main system class for local function execution +- LocalFunction: Represents a function deployed locally in a Docker container +- Deployment: Manages deployments and memory measurements for local functions + +The local platform supports HTTP triggers and provides memory profiling capabilities +for performance analysis. It can also be integrated with local object and NoSQL storage. +""" + from .local import Local # noqa from .function import LocalFunction # noqa from .deployment import Deployment # noqa diff --git a/sebs/local/config.py b/sebs/local/config.py index 0b512c67..4cfa9795 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -1,3 +1,16 @@ +"""Configuration classes for the local execution platform. + +This module provides configuration classes for the SeBS local execution platform, +including credentials, resources, and overall configuration management. The local +platform requires minimal configuration since it runs functions locally using +Docker containers. + +Classes: + LocalCredentials: Empty credentials class for local execution + LocalResources: Resource management for local deployments + LocalConfig: Main configuration class for local platform +""" + from typing import cast, Optional, Set from sebs.cache import Cache @@ -8,49 +21,104 @@ class LocalCredentials(Credentials): + """Credentials class for local execution platform. + + The local platform doesn't require any authentication credentials since + functions run locally using Docker containers. This class provides the + required interface with empty implementations. + """ + def serialize(self) -> dict: + """Serialize credentials to dictionary. + + Returns: + dict: Empty dictionary as no credentials are required for local execution + """ return {} @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: - return LocalCredentials() + """Deserialize credentials from configuration. + Args: + config: Configuration dictionary (unused for local) + cache: Cache client (unused for local) + handlers: Logging handlers (unused for local) -""" - No need to cache and store - we prepare the benchmark and finish. - The rest is used later by the user. -""" + Returns: + LocalCredentials: New instance of local credentials + """ + return LocalCredentials() class LocalResources(SelfHostedResources): + """Resource management for local execution platform. + + Manages resources for local function execution, including port allocation + for Docker containers and storage configurations. Tracks allocated ports + to avoid conflicts when running multiple functions. + + In local deployments, caching and storing resource details is minimal. + + Attributes: + _path: Path for local resource storage + _allocated_ports: Set of ports currently allocated to containers + """ + def __init__( self, storage_cfg: Optional[PersistentStorageConfig] = None, nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, ): + """Initialize local resources. + + Args: + storage_cfg: Optional persistent storage configuration + nosql_storage_cfg: Optional NoSQL storage configuration + """ self._path: str = "" super().__init__("local", storage_cfg, nosql_storage_cfg) self._allocated_ports: Set[int] = set() @property def allocated_ports(self) -> set: + """Get the set of allocated ports. + + Returns: + set: Set of port numbers currently allocated to containers + """ return self._allocated_ports def serialize(self) -> dict: + """Serialize resources to dictionary. + + Returns: + dict: Dictionary containing resource configuration including allocated ports + """ out = super().serialize() out["allocated_ports"] = list(self._allocated_ports) return out @staticmethod - def initialize(res: Resources, config: dict): + def initialize(res: Resources, config: dict) -> None: + """Initialize resources from configuration. + Args: + res: Resources instance to initialize + config: Configuration dictionary containing resource settings + """ resources = cast(LocalResources, res) if "allocated_ports" in config: resources._allocated_ports = set(config["allocated_ports"]) - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update cache with current resource state. + + Args: + cache: Cache client to update + """ super().update_cache(cache) cache.update_config( val=list(self._allocated_ports), keys=["local", "resources", "allocated_ports"] @@ -58,6 +126,16 @@ def update_cache(self, cache: Cache): @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """Deserialize resources from configuration. + + Args: + config: Configuration dictionary + cache: Cache client for loading cached resources + handlers: Logging handlers for resource logging + + Returns: + LocalResources: Initialized local resources instance + """ ret = LocalResources() cached_config = cache.get_config("local") @@ -77,34 +155,82 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class LocalConfig(Config): + """Configuration class for local execution platform. + + No extra configuration - just implementation of the required interfaces. + + Attributes: + _credentials: Local credentials instance (empty) + _resources: Local resources instance for port management + """ + def __init__(self): + """Initialize local configuration.""" super().__init__(name="local") self._credentials = LocalCredentials() self._resources = LocalResources() @staticmethod def typename() -> str: + """Get the type name for this configuration. + + Returns: + str: Type name "Local.Config" + """ return "Local.Config" @staticmethod - def initialize(cfg: Config, dct: dict): + def initialize(cfg: Config, dct: dict) -> None: + """Initialize configuration from dictionary. + + Args: + cfg: Configuration instance to initialize + dct: Dictionary containing configuration data + + Note: + No initialization needed for local platform + """ pass @property def credentials(self) -> LocalCredentials: + """Get the local credentials. + + Returns: + LocalCredentials: The credentials instance + """ return self._credentials @property def resources(self) -> LocalResources: + """Get the local resources. + + Returns: + LocalResources: The resources instance + """ return self._resources @resources.setter - def resources(self, val: LocalResources): + def resources(self, val: LocalResources) -> None: + """Set the local resources. + + Args: + val: New resources instance + """ self._resources = val @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + """Deserialize configuration from dictionary. + + Args: + config: Configuration dictionary + cache: Cache client for loading cached configuration + handlers: Logging handlers for configuration logging + Returns: + LocalConfig: Initialized local configuration instance + """ config_obj = LocalConfig() config_obj.resources = cast( LocalResources, LocalResources.deserialize(config, cache, handlers) @@ -113,8 +239,18 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config return config_obj def serialize(self) -> dict: + """Serialize configuration to dictionary. + + Returns: + dict: Dictionary containing configuration data + """ out = {"name": "local", "region": self._region, "resources": self._resources.serialize()} return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update cache with current configuration. + + Args: + cache: Cache client to update + """ self.resources.update_cache(cache) diff --git a/sebs/local/deployment.py b/sebs/local/deployment.py index 85f7df8e..86365297 100644 --- a/sebs/local/deployment.py +++ b/sebs/local/deployment.py @@ -1,3 +1,13 @@ +"""Deployment management for local execution platform. + +This module provides the Deployment class for managing local function deployments, +including memory measurement collection, function lifecycle management, and +resource cleanup. + +Classes: + Deployment: Main deployment management class for local functions +""" + import json import os from signal import SIGKILL @@ -12,15 +22,36 @@ class Deployment(LoggingBase): + """Manages local function deployments and memory measurements. + + Attributes: + _functions: List of deployed local functions + _storage: Optional Minio storage instance + _inputs: List of function input configurations + _memory_measurement_pids: PIDs of memory measurement processes + _measurement_file: Path to memory measurement output file + """ + @property def measurement_file(self) -> Optional[str]: + """Get the path to the memory measurement file. + + Returns: + Optional[str]: Path to measurement file, or None if not set + """ return self._measurement_file @measurement_file.setter - def measurement_file(self, val: Optional[str]): + def measurement_file(self, val: Optional[str]) -> None: + """Set the path to the memory measurement file. + + Args: + val: Path to measurement file, or None to unset + """ self._measurement_file = val def __init__(self): + """Initialize a new deployment.""" super().__init__() self._functions: List[LocalFunction] = [] self._storage: Optional[Minio] @@ -28,18 +59,42 @@ def __init__(self): self._memory_measurement_pids: List[int] = [] self._measurement_file: Optional[str] = None - def add_function(self, func: LocalFunction): + def add_function(self, func: LocalFunction) -> None: + """Add a function to the deployment. + + If the function has a memory measurement PID, it's also recorded. + + Args: + func: Local function to add to the deployment + """ self._functions.append(func) if func.memory_measurement_pid is not None: self._memory_measurement_pids.append(func.memory_measurement_pid) - def add_input(self, func_input: dict): + def add_input(self, func_input: dict) -> None: + """Add function input configuration to the deployment. + + Args: + func_input: Dictionary containing function input configuration + """ self._inputs.append(func_input) - def set_storage(self, storage: Minio): + def set_storage(self, storage: Minio) -> None: + """Set the storage instance for the deployment. + + Args: + storage: Minio storage instance to use + """ self._storage = storage - def serialize(self, path: str): + def serialize(self, path: str) -> None: + """Serialize deployment configuration to file. + + Includes details about functions, storage, inputs, and memory measurements. + + Args: + path: File path to write serialized deployment configuration + """ with open(path, "w") as out: config: dict = { "functions": self._functions, @@ -55,9 +110,20 @@ def serialize(self, path: str): out.write(serialize(config)) - # FIXME: do we still use it? @staticmethod def deserialize(path: str, cache_client: Cache) -> "Deployment": + """Deserialize deployment configuration from file. + + Args: + path: File path to read serialized deployment configuration + cache_client: Cache client for loading cached resources + + Returns: + Deployment: Deserialized deployment instance + + Note: + This method may be deprecated - check if still in use + """ with open(path, "r") as in_f: input_data = json.load(in_f) deployment = Deployment() @@ -73,8 +139,16 @@ def deserialize(path: str, cache_client: Cache) -> "Deployment": ) return deployment - def shutdown(self, output_json: str): + def shutdown(self, output_json: str) -> None: + """Shutdown the deployment and collect memory measurements. + + Terminates all memory measurement processes, processes measurement data, + and stops all function containers. Memory measurements are aggregated + and written to the specified output file. + Args: + output_json: Path to write memory measurement results + """ if len(self._memory_measurement_pids) > 0: self.logging.info("Killing memory measurement processes") diff --git a/sebs/local/function.py b/sebs/local/function.py index f0104a4e..f141f58a 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -1,3 +1,12 @@ +"""Function and trigger implementations for local execution platform. + +Functions run as Docker containers with HTTP triggers for invocation. + +Classes: + HTTPTrigger: HTTP-based trigger for local function invocation + LocalFunction: Represents a function deployed locally in a Docker container +""" + import concurrent.futures import docker import json @@ -8,36 +17,102 @@ class HTTPTrigger(Trigger): + """HTTP trigger for local function invocation. + + Provides HTTP-based triggering for functions running in local Docker containers. + Supports both synchronous and asynchronous invocation patterns. + + Attributes: + url: HTTP URL endpoint for function invocation + """ + def __init__(self, url: str): + """Initialize HTTP trigger. + + Args: + url: HTTP URL endpoint for the function + """ super().__init__() self.url = url @staticmethod def typename() -> str: + """Get the type name for this trigger. + + Returns: + str: Type name "Local.HTTPTrigger" + """ return "Local.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Get the trigger type. + + Returns: + Trigger.TriggerType: HTTP trigger type + """ return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: + """Synchronously invoke the function via HTTP. + + Args: + payload: Function input payload as dictionary + + Returns: + ExecutionResult: Result of the function execution + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """Asynchronously invoke the function via HTTP. + + Args: + payload: Function input payload as dictionary + + Returns: + concurrent.futures.Future: Future object for the execution result + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """Serialize trigger configuration to dictionary. + + Returns: + dict: Dictionary containing trigger type and URL + """ return {"type": "HTTP", "url": self.url} @staticmethod def deserialize(obj: dict) -> Trigger: + """Deserialize trigger from dictionary. + + Args: + obj: Dictionary containing trigger configuration + + Returns: + HTTPTrigger: Deserialized HTTP trigger instance + """ return HTTPTrigger(obj["url"]) class LocalFunction(Function): + """Function implementation for local execution platform. + + Represents a serverless function running locally in a Docker container. + Handles container management and URL resolution. + + Attributes: + _instance: Docker container running the function + _instance_id: Container ID for the function + _port: Port number the function is listening on + _url: Complete URL for function invocation + _measurement_pid: Optional PID of memory measurement process + """ + def __init__( self, docker_container, @@ -48,6 +123,22 @@ def __init__( config: FunctionConfig, measurement_pid: Optional[int] = None, ): + """Initialize local function. + + Determines the invocation URL based on the Docker container's network settings. + + Args: + docker_container: Docker container instance running the function + port: Port number the function is listening on + name: Function name + benchmark: Benchmark name this function implements + code_package_hash: Hash of the function code package + config: Function configuration + measurement_pid: Optional PID of memory measurement process + + Raises: + RuntimeError: If container IP address cannot be determined + """ super().__init__(benchmark, name, code_package_hash, config) self._instance = docker_container self._instance_id = docker_container.id @@ -74,25 +165,55 @@ def __init__( @property def container(self) -> docker.models.containers.Container: + """Get the Docker container running this function. + + Returns: + docker.models.containers.Container: The Docker container instance + """ return self._instance @container.setter - def container(self, instance: docker.models.containers.Container): + def container(self, instance: docker.models.containers.Container) -> None: + """Set the Docker container for this function. + + Args: + instance: New Docker container instance + """ self._instance = instance @property def url(self) -> str: + """Get the URL for function invocation. + + Returns: + str: HTTP URL for invoking the function + """ return self._url @property def memory_measurement_pid(self) -> Optional[int]: + """Get the PID of the memory measurement process. + + Returns: + Optional[int]: PID of memory measurement process, or None if not measuring + """ return self._measurement_pid @staticmethod def typename() -> str: + """Get the type name for this function. + + Returns: + str: Type name "Local.LocalFunction" + """ return "Local.LocalFunction" def serialize(self) -> dict: + """Serialize function configuration to dictionary. + + Returns: + dict: Dictionary containing function configuration including container details + """ return { **super().serialize(), "instance_id": self._instance_id, @@ -102,6 +223,17 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LocalFunction": + """Deserialize function from cached configuration. + + Args: + cached_config: Dictionary containing cached function configuration + + Returns: + LocalFunction: Deserialized function instance + + Raises: + RuntimeError: If cached container is no longer available + """ try: instance_id = cached_config["instance_id"] instance = docker.from_env().containers.get(instance_id) @@ -117,7 +249,11 @@ def deserialize(cached_config: dict) -> "LocalFunction": except docker.errors.NotFound: raise RuntimeError(f"Cached container {instance_id} not available anymore!") - def stop(self): + def stop(self) -> None: + """Stop the function container. + + Stops the Docker container running this function with immediate timeout. + """ self.logging.info(f"Stopping function container {self._instance_id}") self._instance.stop(timeout=0) self.logging.info(f"Function container {self._instance_id} stopped succesfully") diff --git a/sebs/local/local.py b/sebs/local/local.py index 32b9f9ff..643918a0 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -1,3 +1,20 @@ +"""Local execution platform for SeBS. + +It runs serverless functions locally using Docker containers, providing a +development and testing environment that mimics serverless execution without requiring +cloud platform deployment. + +The local platform provides: +- Docker-based function execution +- HTTP triggers for function invocation +- Memory profiling and measurement capabilities +- Port management for multiple concurrent functions +- Cross-platform support (Linux, macOS, Windows) + +Key Classes: + Local: Main system class implementing the local execution platform +""" + import os import requests import shutil @@ -21,43 +38,97 @@ class Local(System): + """Local execution platform implementation. + + Attributes: + DEFAULT_PORT: Default port number for function containers (9000) + _config: Local platform configuration + _remove_containers: Whether to automatically remove containers after use + _memory_measurement_path: Path to memory measurement file + _measure_interval: Interval for memory measurements (-1 disables) + """ DEFAULT_PORT = 9000 @staticmethod - def name(): + def name() -> str: + """Get the platform name. + + Returns: + str: Platform name "local" + """ return "local" @staticmethod - def typename(): + def typename() -> str: + """Get the platform type name. + + Returns: + str: Type name "Local" + """ return "Local" @staticmethod def function_type() -> "Type[Function]": + """Get the function type for this platform. + + Returns: + Type[Function]: LocalFunction class + """ return LocalFunction @property def config(self) -> LocalConfig: + """Get the local platform configuration. + + Returns: + LocalConfig: The platform configuration + """ return self._config @property def remove_containers(self) -> bool: + """Get whether containers are automatically removed. + + Returns: + bool: True if containers are removed after use + """ return self._remove_containers @remove_containers.setter - def remove_containers(self, val: bool): + def remove_containers(self, val: bool) -> None: + """Set whether containers are automatically removed. + + Args: + val: Whether to remove containers after use + """ self._remove_containers = val @property def measure_interval(self) -> int: + """Get the memory measurement interval. + + Returns: + int: Measurement interval in milliseconds, -1 if disabled + """ return self._measure_interval @property def measurements_enabled(self) -> bool: + """Check if memory measurements are enabled. + + Returns: + bool: True if measurements are enabled + """ return self._measure_interval > -1 @property def measurement_path(self) -> Optional[str]: + """Get the path to the memory measurement file. + + Returns: + Optional[str]: Path to measurement file, or None if not set + """ return self._memory_measurement_path def __init__( @@ -68,6 +139,15 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """Initialize the local execution platform. + + Args: + sebs_config: Global SeBS configuration + config: Local platform configuration + cache_client: Cache client for storing artifacts + docker_client: Docker client for container management + logger_handlers: Logging handlers for output + """ super().__init__( sebs_config, cache_client, @@ -85,31 +165,13 @@ def __init__( self.initialize_resources(select_prefix="local") - """ - Shut down minio storage instance. - """ + def shutdown(self) -> None: + """Shut down the local platform. - def shutdown(self): + Performs cleanup operations including shutting down any storage instances. + """ super().shutdown() - """ - It would be sufficient to just pack the code and ship it as zip to AWS. - However, to have a compatible function implementation across providers, - we create a small module. - Issue: relative imports in Python when using storage wrapper. - Azure expects a relative import inside a module. - - Structure: - function - - function.py - - storage.py - - resources - handler.py - - dir: directory where code is located - benchmark: benchmark name - """ - def package_code( self, directory: str, @@ -120,6 +182,31 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """Package function code for local execution. + + Creates a compatible code package structure for local execution that + maintains compatibility across cloud providers. Reorganizes files into + a module structure to handle relative imports properly. + + The packaging creates this structure: + - function/ + - function.py + - storage.py + - resources/ + - handler.py + + Args: + directory: Directory containing the function code + language_name: Programming language (e.g., "python", "nodejs") + language_version: Language version (e.g., "3.8", "14") + architecture: Target architecture (unused for local) + benchmark: Benchmark name + is_cached: Whether the package is from cache + container_deployment: Whether using container deployment + + Returns: + Tuple[str, int, str]: (package_path, size_bytes, deployment_package_uri) + """ CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], @@ -143,6 +230,23 @@ def package_code( def _start_container( self, code_package: Benchmark, func_name: str, func: Optional[LocalFunction] ) -> LocalFunction: + """Start a Docker container for function execution. + + Creates and starts a Docker container running the function code. Handles + port allocation, environment setup, volume mounting, and health checking. + Optionally starts memory measurement processes. + + Args: + code_package: Benchmark code package to deploy + func_name: Name of the function + func: Optional existing function to update (for restarts) + + Returns: + LocalFunction: Running function instance + + Raises: + RuntimeError: If port allocation fails or container won't start + """ container_name = "{}:run.local.{}.{}".format( self._system_config.docker_repository(), @@ -286,33 +390,61 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "LocalFunction": + """Create a new function deployment. In practice, it starts a new Docker container. + + Args: + code_package: Benchmark code package to deploy + func_name: Name for the function + container_deployment: Whether to use container deployment (unsupported) + container_uri: Container URI (unused for local) + Returns: + LocalFunction: Created function instance + + Raises: + NotImplementedError: If container deployment is requested + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Local") return self._start_container(code_package, func_name, None) - """ - Restart Docker container - """ - def update_function( self, function: Function, code_package: Benchmark, container_deployment: bool, container_uri: str, - ): + ) -> None: + """Update an existing function with new code. + + Stops the existing container and starts a new one with updated code. + + Args: + function: Existing function to update + code_package: New benchmark code package + container_deployment: Whether to use container deployment (unused) + container_uri: Container URI (unused) + """ func = cast(LocalFunction, function) func.stop() self.logging.info("Allocating a new function container with updated code") self._start_container(code_package, function.name, func) - """ - For local functions, we don't need to do anything for a cached function. - There's only one trigger - HTTP. - """ - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """Create a trigger for function invocation. + + For local functions, only HTTP triggers are supported. + + Args: + func: Function to create trigger for + trigger_type: Type of trigger to create + + Returns: + Trigger: Created trigger instance + + Raises: + RuntimeError: If trigger type is not HTTP + """ from sebs.local.function import HTTPTrigger function = cast(LocalFunction, func) @@ -326,10 +458,26 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T self.cache_client.update_function(function) return trigger - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """Handle cached function setup. + + For local functions, no special handling is needed for cached functions. + + Args: + function: Cached function instance + """ pass - def update_function_configuration(self, function: Function, code_package: Benchmark): + def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: + """Update function configuration. + + Args: + function: Function to update + code_package: Benchmark code package + + Raises: + RuntimeError: Always raised as configuration updates are not supported + """ self.logging.error("Updating function configuration of local deployment is not supported") raise RuntimeError("Updating function configuration of local deployment is not supported") @@ -340,16 +488,47 @@ def download_metrics( end_time: int, requests: Dict[str, ExecutionResult], metrics: dict, - ): + ) -> None: + """Download execution metrics. + + For local execution, metrics are not available from the platform. + + Args: + function_name: Name of the function + start_time: Start time for metrics collection + end_time: End time for metrics collection + requests: Execution requests to collect metrics for + metrics: Dictionary to store collected metrics + """ pass - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """Enforce cold start for functions. + + Args: + functions: List of functions to enforce cold start on + code_package: Benchmark code package + + Raises: + NotImplementedError: Cold start enforcement is not implemented for local + """ raise NotImplementedError() @staticmethod def default_function_name( code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """Generate default function name. + + Creates a standardized function name based on the code package and resources. + + Args: + code_package: Benchmark code package + resources: Optional resources instance for ID inclusion + + Returns: + str: Generated function name + """ # Create function name if resources is not None: func_name = "sebs-{}-{}-{}-{}".format( @@ -368,10 +547,30 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """Format function name for platform requirements. + + For local execution, no formatting is needed. + + Args: + func_name: Function name to format + + Returns: + str: Formatted function name (unchanged for local) + """ return func_name def start_measurements(self, measure_interval: int) -> Optional[str]: + """Start memory measurements for function containers. + + Creates a temporary file for storing memory measurements and enables + measurement collection at the specified interval. + + Args: + measure_interval: Measurement interval in milliseconds + Returns: + Optional[str]: Path to measurement file, or None if measurements disabled + """ self._measure_interval = measure_interval if not self.measurements_enabled: diff --git a/sebs/local/measureMem.py b/sebs/local/measureMem.py index 74cae636..691a54be 100644 --- a/sebs/local/measureMem.py +++ b/sebs/local/measureMem.py @@ -1,9 +1,20 @@ -""" -Measure memory consumption of a specified docker container. +"""Memory measurement utility for Docker containers. + +This script periodically reads the `memory.current` file from the container's +cgroup to record its memory usage. The measurements +are appended to a specified output file. + +The measurement process: +1. Reads memory.current from the container's cgroup +2. Records the measurement with container ID and timestamp +3. Tracks precision errors when measurement intervals are exceeded +4. Continues until the container stops or process is terminated -Specifically, the pseudofile memory.current from the cgroup -pseudo-filesystem is read by a shell command (cat) every few -milliseconds while the container is running. +Functions: + measure: Main measurement function that continuously monitors container memory + +Usage: + python measureMem.py --container-id --measure-interval --measurement-file """ import subprocess @@ -12,7 +23,22 @@ def measure(container_id: str, measure_interval: int, measurement_file: str) -> None: + """Continuously measure memory consumption of a Docker container. + + Reads memory usage from the container's cgroup filesystem at regular intervals + and writes measurements to the specified file. Handles different cgroup paths + for compatibility with various Docker configurations. + Args: + container_id: Docker container ID to monitor + measure_interval: Measurement interval in milliseconds + measurement_file: Path to file for writing measurements + + Note: + This function runs indefinitely until the process is terminated. + It attempts two different cgroup paths to accommodate different + Docker/systemd configurations. + """ f = open(measurement_file, "a") while True: @@ -33,13 +59,23 @@ def measure(container_id: str, measure_interval: int, measurement_file: str) -> time.sleep(max(0, (measure_interval - iter_duration / 1e6) / 1000)) -""" - Parse container ID and measure interval and start memory measurement process. -""" if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--container-id", type=str) - parser.add_argument("--measurement-file", type=str) - parser.add_argument("--measure-interval", type=int) + """Parse command line arguments and start memory measurement process. + + Command line arguments: + --container-id: Docker container ID to monitor + --measurement-file: Path to file for writing measurements + --measure-interval: Measurement interval in milliseconds + """ + parser = argparse.ArgumentParser(description="Measure memory consumption of a Docker container") + parser.add_argument( + "--container-id", type=str, required=True, help="Docker container ID to monitor" + ) + parser.add_argument( + "--measurement-file", type=str, required=True, help="Path to file for writing measurements" + ) + parser.add_argument( + "--measure-interval", type=int, required=True, help="Measurement interval in milliseconds" + ) args, unknown = parser.parse_known_args() measure(args.container_id, args.measure_interval, args.measurement_file) diff --git a/sebs/openwhisk/__init__.py b/sebs/openwhisk/__init__.py index 614d9443..b25f6488 100644 --- a/sebs/openwhisk/__init__.py +++ b/sebs/openwhisk/__init__.py @@ -1,2 +1,24 @@ +"""Apache OpenWhisk integration module for SeBS. + +This module provides the complete OpenWhisk integration: +- OpenWhisk system and function management +- Configuration classes for credentials and resources +- Function and trigger implementations +- Docker container management +- CLI and HTTP-based invocation methods + +Main Classes: + OpenWhisk: Main OpenWhisk system implementation + OpenWhiskConfig: Configuration management for OpenWhisk deployments + OpenWhiskFunction: OpenWhisk-specific function implementation + LibraryTrigger: CLI-based function invocation + HTTPTrigger: HTTP-based function invocation + +Example: + >>> from sebs.openwhisk import OpenWhisk, OpenWhiskConfig + >>> config = OpenWhiskConfig.deserialize(config_dict, cache, handlers) + >>> system = OpenWhisk(sys_config, config, cache, docker_client, handlers) +""" + from .openwhisk import OpenWhisk # noqa from .config import OpenWhiskConfig # noqa diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py index bba54f7c..90a7cac0 100644 --- a/sebs/openwhisk/config.py +++ b/sebs/openwhisk/config.py @@ -1,30 +1,92 @@ +""" +Configuration management for Apache OpenWhisk deployments in SeBS. + +It handles Docker registry configuration, storage settings, +and deployment parameters for OpenWhisk serverless functions. + +Classes: + OpenWhiskCredentials: Manages authentication credentials for OpenWhisk + OpenWhiskResources: Handles Docker registry and storage resources + OpenWhiskConfig: Main configuration class for OpenWhisk deployment settings +""" + from __future__ import annotations +from typing import Optional, cast, Dict, Any + from sebs.cache import Cache from sebs.faas.config import Credentials, Resources, Config from sebs.utils import LoggingHandlers from sebs.storage.resources import SelfHostedResources -from typing import cast, Optional - class OpenWhiskCredentials(Credentials): + """ + Manages authentication credentials for OpenWhisk deployments. + + Since we do not use extra credentials there, it just implements + the expected interface. + + Note: + OpenWhisk deployments typically rely on local authentication through + the wsk CLI tool rather than explicit credential management. + """ + @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Credentials: + """ + Deserialize OpenWhisk credentials from configuration. + + Args: + config: Configuration dictionary containing credential data + cache: Cache instance for storing/retrieving cached credentials + handlers: Logging handlers for credential operations + + Returns: + OpenWhiskCredentials instance (currently empty) + """ return OpenWhiskCredentials() - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """ + Serialize credentials to dictionary format. + + Returns: + Empty dictionary as OpenWhisk uses CLI-based authentication + """ return {} class OpenWhiskResources(SelfHostedResources): + """ + Manages Docker registry and storage resources for OpenWhisk deployments. + + This class handles configuration of Docker registry. + + Attributes: + _docker_registry: Docker registry URL for storing function images + _docker_username: Username for Docker registry authentication + _docker_password: Password for Docker registry authentication + _registry_updated: Flag indicating if registry configuration has been updated + _storage_updated: Flag indicating if storage configuration has been updated + """ + def __init__( self, registry: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, registry_updated: bool = False, - ): + ) -> None: + """ + Initialize OpenWhisk resources configuration. + + Args: + registry: Docker registry URL for storing function images + username: Username for Docker registry authentication + password: Password for Docker registry authentication + registry_updated: Whether registry configuration has been updated + """ super().__init__(name="openwhisk") self._docker_registry = registry if registry != "" else None self._docker_username = username if username != "" else None @@ -34,38 +96,95 @@ def __init__( @staticmethod def typename() -> str: + """ + Get the type name for this resource class. + + Returns: + String identifier for OpenWhisk resources + """ return "OpenWhisk.Resources" @property def docker_registry(self) -> Optional[str]: + """ + Get the Docker registry URL. + + Returns: + Docker registry URL or None if not configured + """ return self._docker_registry @property def docker_username(self) -> Optional[str]: + """ + Get the Docker registry username. + + Returns: + Docker registry username or None if not configured + """ return self._docker_username @property def docker_password(self) -> Optional[str]: + """ + Get the Docker registry password. + + Returns: + Docker registry password or None if not configured + """ return self._docker_password @property def storage_updated(self) -> bool: + """ + Check if storage configuration has been updated. + + Returns: + True if storage configuration has been updated, False otherwise + """ return self._storage_updated @property def registry_updated(self) -> bool: + """ + Check if registry configuration has been updated. + + Returns: + True if registry configuration has been updated, False otherwise + """ return self._registry_updated @staticmethod - def initialize(res: Resources, dct: dict): + def initialize(res: Resources, dct: Dict[str, Any]) -> None: + """ + Initialize OpenWhisk resources from dictionary configuration. + + Args: + res: Resources instance to initialize + dct: Dictionary containing Docker registry configuration + Expected keys: 'registry', 'username', 'password' + """ ret = cast(OpenWhiskResources, res) ret._docker_registry = dct["registry"] ret._docker_username = dct["username"] ret._docker_password = dct["password"] @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Resources: + """ + Deserialize OpenWhisk resources from configuration. + This method handles both user-provided configuration and cached values, + prioritizing user configuration while detecting updates. + + Args: + config: Configuration dictionary that may contain 'docker_registry' section + cache: Cache instance to retrieve/store configuration + handlers: Logging handlers for resource operations + + Returns: + OpenWhiskResources instance with appropriate configuration + """ cached_config = cache.get_config("openwhisk") ret = OpenWhiskResources() if cached_config: @@ -73,7 +192,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour ret, cached_config["resources"] ) - ret._deserialize(ret, config, cached_config) + ret._deserialize(ret, config, cached_config or {}) # Check for new config - overrides but check if it's different if "docker_registry" in config: @@ -108,7 +227,13 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour return ret - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """ + Update cache with current resource configuration. + + Args: + cache: Cache instance to update with current configuration + """ super().update_cache(cache) cache.update_config( val=self.docker_registry, keys=["openwhisk", "resources", "docker", "registry"] @@ -120,8 +245,15 @@ def update_cache(self, cache: Cache): val=self.docker_password, keys=["openwhisk", "resources", "docker", "password"] ) - def serialize(self) -> dict: - out: dict = { + def serialize(self) -> Dict[str, Any]: + """ + Serialize resource configuration to dictionary. + + Returns: + Dictionary containing all resource configuration including + Docker registry settings and inherited storage configuration + """ + out: Dict[str, Any] = { **super().serialize(), "docker_registry": self.docker_registry, "docker_username": self.docker_username, @@ -131,34 +263,94 @@ def serialize(self) -> dict: class OpenWhiskConfig(Config): + """ + Main configuration class for OpenWhisk deployments. + + This class focuses on OpenWhisk-specific configuration settings: + cluster management, WSK CLI settings, and experimental features. + + Attributes: + name: Platform name identifier ('openwhisk') + shutdownStorage: Whether to shutdown storage after experiments + removeCluster: Whether to remove cluster after experiments + wsk_exec: Path to WSK CLI executable + wsk_bypass_security: Whether to bypass security checks in WSK CLI + experimentalManifest: Whether to use experimental manifest features + cache: Cache instance for configuration persistence + _credentials: OpenWhisk credentials configuration + _resources: OpenWhisk resources configuration + """ + name: str shutdownStorage: bool + removeCluster: bool + wsk_exec: str + wsk_bypass_security: bool + experimentalManifest: bool cache: Cache - def __init__(self, config: dict, cache: Cache): + def __init__( + self, resources: OpenWhiskResources, credentials: OpenWhiskCredentials, cache: Cache + ) -> None: + """ + Initialize OpenWhisk configuration. + + Args: + config: Configuration dictionary containing OpenWhisk settings + cache: Cache instance for configuration persistence + """ super().__init__(name="openwhisk") - self._credentials = OpenWhiskCredentials() - self._resources = OpenWhiskResources() - self.shutdownStorage = config["shutdownStorage"] - self.removeCluster = config["removeCluster"] - self.wsk_exec = config["wskExec"] - self.wsk_bypass_security = config["wskBypassSecurity"] - self.experimentalManifest = config["experimentalManifest"] + self._credentials = credentials + self._resources = resources self.cache = cache @property def credentials(self) -> OpenWhiskCredentials: + """ + Get OpenWhisk credentials configuration. + + Returns: + OpenWhiskCredentials instance + """ return self._credentials @property def resources(self) -> OpenWhiskResources: + """ + Get OpenWhisk resources configuration. + + Returns: + OpenWhiskResources instance + """ return self._resources @staticmethod - def initialize(cfg: Config, dct: dict): - pass - - def serialize(self) -> dict: + def initialize(cfg: Config, dct: Dict[str, Any]) -> None: + """ + Initialize configuration from dictionary. + + Args: + cfg: Configuration instance to initialize + dct: Dictionary containing initialization data + """ + + config = cast(OpenWhiskConfig, cfg) + config._region = dct["region"] + + config.shutdownStorage = dct["shutdownStorage"] + config.removeCluster = dct["removeCluster"] + config.wsk_exec = dct["wskExec"] + config.wsk_bypass_security = dct["wskBypassSecurity"] + config.experimentalManifest = dct["experimentalManifest"] + + def serialize(self) -> Dict[str, Any]: + """ + Serialize configuration to dictionary format. + + Returns: + Dictionary containing all OpenWhisk configuration settings + including credentials and resources + """ return { "name": "openwhisk", "shutdownStorage": self.shutdownStorage, @@ -171,18 +363,43 @@ def serialize(self) -> dict: } @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: - cached_config = cache.get_config("openwhisk") + def deserialize(config: Dict[str, Any], cache: Cache, handlers: LoggingHandlers) -> Config: + """ + Deserialize OpenWhisk configuration from dictionary and cache. + + Args: + config: Configuration dictionary containing OpenWhisk settings + cache: Cache instance to retrieve cached configuration + handlers: Logging handlers for configuration operations + + Returns: + OpenWhiskConfig instance with deserialized configuration + """ resources = cast( OpenWhiskResources, OpenWhiskResources.deserialize(config, cache, handlers) ) - res = OpenWhiskConfig(config, cached_config) + res = OpenWhiskConfig(resources, OpenWhiskCredentials(), cache) res.logging_handlers = handlers - res._resources = resources + + cached_config = cache.get_config("openwhisk") + + if cached_config: + res.logging.info("Loading cached config for OpenWhisk") + OpenWhiskConfig.initialize(res, cached_config) + else: + res.logging.info("Using user-provided config for GCP") + OpenWhiskConfig.initialize(res, config) + return res - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """ + Update cache with current configuration values. + + Args: + cache: Cache instance to update with current configuration + """ cache.update_config(val=self.shutdownStorage, keys=["openwhisk", "shutdownStorage"]) cache.update_config(val=self.removeCluster, keys=["openwhisk", "removeCluster"]) cache.update_config(val=self.wsk_exec, keys=["openwhisk", "wskExec"]) diff --git a/sebs/openwhisk/container.py b/sebs/openwhisk/container.py index 2dd27717..a684c0db 100644 --- a/sebs/openwhisk/container.py +++ b/sebs/openwhisk/container.py @@ -1,3 +1,13 @@ +"""Docker container management for OpenWhisk functions in SeBS. + +Its primary focus is supporting both DockerHub and custom, local Docker registries. +The latter make development and prototyping much faster and easier. +They also allow users to push new images. + +Classes: + OpenWhiskContainer: OpenWhisk-specific Docker container management +""" + import docker from typing import Tuple @@ -7,12 +17,39 @@ class OpenWhiskContainer(DockerContainer): + """ + OpenWhisk-specific Docker container management. + + Attributes: + config: OpenWhisk configuration containing registry settings + + Example: + >>> container = OpenWhiskContainer( + ... sys_config, ow_config, docker_client, True + ... ) + >>> registry, repo, tag, uri = container.registry_name( + ... "benchmark", "python", "3.8", "x86_64" + ... ) + """ + @staticmethod def name() -> str: + """ + Get the platform name identifier. + + Returns: + Platform name as string + """ return "openwhisk" @staticmethod def typename() -> str: + """ + Get the container type name. + + Returns: + Container type name as string + """ return "OpenWhisk.Container" def __init__( @@ -21,14 +58,42 @@ def __init__( config: OpenWhiskConfig, docker_client: docker.client, experimental_manifest: bool, - ): + ) -> None: + """ + Initialize OpenWhisk container manager. + + Args: + system_config: Global SeBS system configuration + config: OpenWhisk-specific configuration settings + docker_client: Docker client for container operations + experimental_manifest: Whether to use experimental manifest features + """ super().__init__(system_config, docker_client, experimental_manifest) self.config = config def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: + """ + Generate Docker registry information for a benchmark image. + + This method creates the appropriate registry name, repository name, image tag, + and complete image URI based on the benchmark parameters and OpenWhisk + configuration. It handles both custom registries and Docker Hub. + + Args: + benchmark: Name of the benchmark + language_name: Programming language (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (e.g., 'x86_64') + Returns: + Tuple containing: + - Registry name (e.g., "my-registry.com" or "Docker Hub") + - Full repository name with registry prefix + - Image tag + - Complete image URI + """ registry_name = self.config.resources.docker_registry # We need to retag created images when pushing to registry other diff --git a/sebs/openwhisk/function.py b/sebs/openwhisk/function.py index daf851ca..d0f0a211 100644 --- a/sebs/openwhisk/function.py +++ b/sebs/openwhisk/function.py @@ -1,6 +1,13 @@ +"""OpenWhisk function and configuration classes for SeBS. + +Classes: + OpenWhiskFunctionConfig: Configuration data class for OpenWhisk functions + OpenWhiskFunction: OpenWhisk-specific function implementation +""" + from __future__ import annotations -from typing import cast, Optional +from typing import cast, Optional, Dict, Any from dataclasses import dataclass from sebs.benchmark import Benchmark @@ -10,15 +17,41 @@ @dataclass class OpenWhiskFunctionConfig(FunctionConfig): + """ + Configuration data class for OpenWhisk functions. + + This class extends the base FunctionConfig to include OpenWhisk-specific + configuration parameters such as Docker image information, namespace settings, + and storage configurations for both object and NoSQL storage. + + Attributes: + docker_image: Docker image URI used for the function deployment + namespace: OpenWhisk namespace (default: "_" for default namespace) + object_storage: Minio object storage configuration if required + nosql_storage: ScyllaDB NoSQL storage configuration if required + + Note: + The docker_image attribute should be merged with higher-level + image abstraction in future refactoring. This is quite similar + to AWS deployments. + """ - # FIXME: merge with higher level abstraction for images docker_image: str = "" namespace: str = "_" object_storage: Optional[MinioConfig] = None nosql_storage: Optional[ScyllaDBConfig] = None @staticmethod - def deserialize(data: dict) -> OpenWhiskFunctionConfig: + def deserialize(data: Dict[str, Any]) -> OpenWhiskFunctionConfig: + """ + Deserialize configuration from dictionary data. + + Args: + data: Dictionary containing serialized configuration data + + Returns: + OpenWhiskFunctionConfig instance with deserialized data + """ keys = list(OpenWhiskFunctionConfig.__dataclass_fields__.keys()) data = {k: v for k, v in data.items() if k in keys} data["runtime"] = Runtime.deserialize(data["runtime"]) @@ -26,41 +59,117 @@ def deserialize(data: dict) -> OpenWhiskFunctionConfig: data["nosql_storage"] = ScyllaDBConfig.deserialize(data["nosql_storage"]) return OpenWhiskFunctionConfig(**data) - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """ + Serialize configuration to dictionary format. + + Returns: + Dictionary containing all configuration data + """ return self.__dict__ @staticmethod def from_benchmark(benchmark: Benchmark) -> OpenWhiskFunctionConfig: + """ + Create configuration from benchmark specification. + + Args: + benchmark: Benchmark instance containing configuration requirements + + Returns: + OpenWhiskFunctionConfig instance initialized from benchmark + """ return super(OpenWhiskFunctionConfig, OpenWhiskFunctionConfig)._from_benchmark( benchmark, OpenWhiskFunctionConfig ) class OpenWhiskFunction(Function): + """ + OpenWhisk-specific function implementation for SeBS. + + It does not implemnet anything non-standard, just implements + trigger and config types specific to OpenWhisk. + + Attributes: + _cfg: OpenWhisk-specific function configuration + + Example: + >>> config = OpenWhiskFunctionConfig.from_benchmark(benchmark) + >>> function = OpenWhiskFunction("test-func", "benchmark-name", "hash123", config) + """ + def __init__( - self, name: str, benchmark: str, code_package_hash: str, cfg: OpenWhiskFunctionConfig - ): + self, + name: str, + benchmark: str, + code_package_hash: str, + cfg: OpenWhiskFunctionConfig, + ) -> None: + """ + Initialize OpenWhisk function. + + Args: + name: Function name (OpenWhisk action name) + benchmark: Name of the benchmark this function implements + code_package_hash: Hash of the code package for cache validation + cfg: OpenWhisk-specific function configuration + """ super().__init__(benchmark, name, code_package_hash, cfg) @property def config(self) -> OpenWhiskFunctionConfig: + """ + Get OpenWhisk-specific function configuration. + + Returns: + OpenWhiskFunctionConfig instance with current settings + """ return cast(OpenWhiskFunctionConfig, self._cfg) @staticmethod def typename() -> str: + """ + Get the type name for this function class. + + Returns: + String identifier for OpenWhisk functions + """ return "OpenWhisk.Function" - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """ + Serialize function to dictionary format. + + Returns: + Dictionary containing function data and OpenWhisk-specific configuration + """ return {**super().serialize(), "config": self._cfg.serialize()} @staticmethod - def deserialize(cached_config: dict) -> OpenWhiskFunction: + def deserialize(cached_config: Dict[str, Any]) -> OpenWhiskFunction: + """ + Deserialize function from cached configuration data. + + Args: + cached_config: Dictionary containing cached function configuration + and trigger information + + Returns: + OpenWhiskFunction instance with deserialized configuration and triggers + + Raises: + AssertionError: If unknown trigger type is encountered + """ from sebs.faas.function import Trigger from sebs.openwhisk.triggers import LibraryTrigger, HTTPTrigger cfg = OpenWhiskFunctionConfig.deserialize(cached_config["config"]) ret = OpenWhiskFunction( - cached_config["name"], cached_config["benchmark"], cached_config["hash"], cfg + cached_config["name"], + cached_config["benchmark"], + cached_config["hash"], + cfg, ) for trigger in cached_config["triggers"]: trigger_type = cast( diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index 9c196fe2..a32357c6 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -1,3 +1,11 @@ +""" +Apache OpenWhisk serverless platform implementation for SeBS. + +This module provides the main OpenWhisk system class that integrates OpenWhisk +serverless platform with the SeBS benchmarking framework. It handles function +deployment, execution, monitoring, and resource management for OpenWhisk clusters. +""" + import os import subprocess from typing import cast, Dict, List, Optional, Tuple, Type @@ -21,6 +29,26 @@ class OpenWhisk(System): + """ + Apache OpenWhisk serverless platform implementation for SeBS. + + This class provides the main integration between SeBS and Apache OpenWhisk, + handling function deployment, execution, container management, and resource + management (primarily self-hosted storage like Minio/ScyllaDB via SelfHostedSystemResources), + and interaction with the `wsk` CLI. + It supports OpenWhisk deployments with Docker-based function packaging. + We do not use code packages due to low package size limits. + + Attributes: + _config: OpenWhisk-specific configuration settings + container_client: Docker container client for function packaging + logging_handlers: Logging handlers for the OpenWhisk system + + Example: + >>> openwhisk = OpenWhisk(sys_config, ow_config, cache, docker_client, handlers) + >>> function = openwhisk.create_function(benchmark, "test-func", True, "image:tag") + """ + _config: OpenWhiskConfig def __init__( @@ -30,7 +58,18 @@ def __init__( cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, - ): + ) -> None: + """ + Initialize OpenWhisk system with configuration and clients. + Will log in to Docker registry. + + Args: + system_config: Global SeBS system configuration + config: OpenWhisk-specific configuration settings + cache_client: Cache client for storing function and resource data + docker_client: Docker client for container operations + logger_handlers: Logging handlers for system operations + """ super().__init__( system_config, cache_client, @@ -59,14 +98,35 @@ def __init__( password=self.config.resources.docker_password, ) - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + def initialize( + self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None + ) -> None: + """ + Initialize OpenWhisk system resources. + + Args: + config: Additional configuration parameters (currently unused) + resource_prefix: Optional prefix for resource naming + """ self.initialize_resources(select_prefix=resource_prefix) @property def config(self) -> OpenWhiskConfig: + """ + Get OpenWhisk configuration. + + Returns: + OpenWhisk configuration instance + """ return self._config def shutdown(self) -> None: + """ + Shutdown OpenWhisk system and clean up resources. + + This method stops storage services if configured and optionally + removes the OpenWhisk cluster based on configuration settings. + """ if hasattr(self, "storage") and self.config.shutdownStorage: self.storage.stop() if self.config.removeCluster: @@ -77,17 +137,41 @@ def shutdown(self) -> None: @staticmethod def name() -> str: + """ + Get the platform name identifier. + + Returns: + Platform name as string + """ return "openwhisk" @staticmethod - def typename(): + def typename() -> str: + """ + Get the platform type name. + + Returns: + Platform type name as string + """ return "OpenWhisk" @staticmethod def function_type() -> "Type[Function]": + """ + Get the function type for this platform. + + Returns: + OpenWhiskFunction class type + """ return OpenWhiskFunction def get_wsk_cmd(self) -> List[str]: + """ + Get the WSK CLI command with appropriate flags. + + Returns: + List of command arguments for WSK CLI execution + """ cmd = [self.config.wsk_exec] if self.config.wsk_bypass_security: cmd.append("-i") @@ -103,6 +187,33 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """ + Package benchmark code for OpenWhisk deployment. + + Creates both a Docker image and a ZIP archive containing the benchmark code. + The ZIP archive is required for OpenWhisk function registration even when + using Docker-based deployment. It contains only the main handlers + (`__main__.py` or `index.js`). The Docker image URI is returned, + which will be used when creating the action. + + Args: + directory: Path to the benchmark code directory + language_name: Programming language (e.g., 'python', 'nodejs') + language_version: Language version (e.g., '3.8', '14') + architecture: Target architecture (e.g., 'x86_64') + benchmark: Benchmark name + is_cached: Whether Docker image is already cached + container_deployment: Whether to use container-based deployment + + Returns: + Tuple containing: + - Path to created ZIP archive + - Size of ZIP archive in bytes + - Docker image URI + + Raises: + RuntimeError: If packaging fails + """ # Regardless of Docker image status, we need to create .zip file # to allow registration of function with OpenWhisk @@ -128,6 +239,18 @@ def package_code( return benchmark_archive, bytes_size, image_uri def storage_arguments(self, code_package: Benchmark) -> List[str]: + """ + Generate storage-related arguments for function deployment. + + Creates WSK CLI parameters for Minio object storage and ScyllaDB NoSQL + storage configurations based on the benchmark requirements. + + Args: + code_package: Benchmark configuration requiring storage access + + Returns: + List of WSK CLI parameter arguments for storage configuration + """ envs = [] if self.config.resources.storage_config: @@ -169,6 +292,25 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "OpenWhiskFunction": + """ + Create or retrieve an OpenWhisk function (action). + + This method checks if a function already exists and updates it if necessary, + or creates a new function with the appropriate configuration, storage settings, + and Docker image. + + Args: + code_package: Benchmark configuration and code package + func_name: Name for the OpenWhisk action + container_deployment: Whether to use container-based deployment + container_uri: URI of the Docker image for the function + + Returns: + OpenWhiskFunction instance configured with LibraryTrigger + + Raises: + RuntimeError: If WSK CLI is not accessible or function creation fails + """ self.logging.info("Creating function as an action in OpenWhisk.") try: actions = subprocess.run( @@ -254,7 +396,19 @@ def update_function( code_package: Benchmark, container_deployment: bool, container_uri: str, - ): + ) -> None: + """ + Update an existing OpenWhisk function with new code and configuration. + + Args: + function: Existing function to update + code_package: New benchmark configuration and code package + container_deployment: Whether to use container-based deployment + container_uri: URI of the new Docker image + + Raises: + RuntimeError: If WSK CLI is not accessible or update fails + """ self.logging.info(f"Update an existing OpenWhisk action {function.name}.") function = cast(OpenWhiskFunction, function) docker_image = self.system_config.benchmark_image_name( @@ -297,7 +451,20 @@ def update_function( self.logging.error(f"Output: {e.stderr.decode('utf-8')}") raise RuntimeError(e) - def update_function_configuration(self, function: Function, code_package: Benchmark): + def update_function_configuration(self, function: Function, code_package: Benchmark) -> None: + """ + Update configuration of an existing OpenWhisk function. + + Updates memory allocation, timeout, and storage parameters without + changing the function code or Docker image. + + Args: + function: Function to update configuration for + code_package: New benchmark configuration settings + + Raises: + RuntimeError: If WSK CLI is not accessible or configuration update fails + """ self.logging.info(f"Update configuration of an existing OpenWhisk action {function.name}.") try: subprocess.run( @@ -326,6 +493,19 @@ def update_function_configuration(self, function: Function, code_package: Benchm raise RuntimeError(e) def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: + """ + Check if function configuration has changed compared to cached version. + + Compares current benchmark configuration and storage settings with the + cached function configuration to determine if an update is needed. + + Args: + cached_function: Previously cached function configuration + benchmark: Current benchmark configuration to compare against + + Returns: + True if configuration has changed and function needs updating + """ changed = super().is_configuration_changed(cached_function, benchmark) storage = cast(Minio, self.system_resources.get_storage()) @@ -353,13 +533,33 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """ + Generate default function name based on benchmark and resource configuration. + + Args: + code_package: Benchmark package containing name and language info + resources: Optional specific resources to use for naming + + Returns: + Generated function name string + """ resource_id = resources.resources_id if resources else self.config.resources.resources_id return ( f"sebs-{resource_id}-{code_package.benchmark}-" f"{code_package.language_name}-{code_package.language_version}" ) - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) -> None: + """ + Enforce cold start for functions (not implemented for OpenWhisk). + + Args: + functions: List of functions to enforce cold start for + code_package: Benchmark package configuration + + Raises: + NotImplementedError: Cold start enforcement not implemented for OpenWhisk + """ raise NotImplementedError() def download_metrics( @@ -369,10 +569,36 @@ def download_metrics( end_time: int, requests: Dict[str, ExecutionResult], metrics: dict, - ): + ) -> None: + """ + Download metrics for function executions (no-op for OpenWhisk). + + Args: + function_name: Name of the function to download metrics for + start_time: Start time for metrics collection (epoch timestamp) + end_time: End time for metrics collection (epoch timestamp) + requests: Dictionary mapping request IDs to execution results + metrics: Dictionary to store downloaded metrics + + Note: + OpenWhisk metrics collection is not currently implemented. + """ pass def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a trigger for function invocation. + + Args: + function: Function to create trigger for + trigger_type: Type of trigger to create (LIBRARY or HTTP) + + Returns: + Created trigger instance + + Raises: + RuntimeError: If WSK CLI is not accessible or trigger type not supported + """ if trigger_type == Trigger.TriggerType.LIBRARY: return function.triggers(Trigger.TriggerType.LIBRARY)[0] elif trigger_type == Trigger.TriggerType.HTTP: @@ -398,12 +624,26 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) else: raise RuntimeError("Not supported!") - def cached_function(self, function: Function): + def cached_function(self, function: Function) -> None: + """ + Configure a cached function with current system settings. + + Updates triggers with current logging handlers and WSK command configuration. + + Args: + function: Cached function to configure + """ for trigger in function.triggers(Trigger.TriggerType.LIBRARY): trigger.logging_handlers = self.logging_handlers cast(LibraryTrigger, trigger).wsk_cmd = self.get_wsk_cmd() for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers - def disable_rich_output(self): + def disable_rich_output(self) -> None: + """ + Disable rich output formatting for container operations. + + This is useful for non-interactive environments or when plain text + output is preferred. + """ self.container_client.disable_rich_output = True diff --git a/sebs/openwhisk/triggers.py b/sebs/openwhisk/triggers.py index f0d8260b..de8bcb38 100644 --- a/sebs/openwhisk/triggers.py +++ b/sebs/openwhisk/triggers.py @@ -1,14 +1,47 @@ +"""Trigger implementations for OpenWhisk function invocation in SeBS. + +This module provides different trigger types for invoking OpenWhisk functions, +including library-based (CLI) triggers and HTTP-based triggers. + +Classes: + LibraryTrigger: CLI-based function invocation using wsk tool + HTTPTrigger: HTTP-based function invocation using web actions +""" + import concurrent.futures import datetime import json import subprocess -from typing import Dict, List, Optional # noqa +from typing import Dict, List, Optional, Any # noqa from sebs.faas.function import ExecutionResult, Trigger class LibraryTrigger(Trigger): - def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None): + """ + CLI-based trigger for OpenWhisk function invocation. + + This trigger uses the wsk CLI tool to invoke OpenWhisk actions directly, + providing synchronous and asynchronous invocation capabilities. It handles + parameter passing and result parsing for CLI-based invocations. + + Attributes: + fname: Name of the OpenWhisk action to invoke + _wsk_cmd: Complete WSK CLI command for function invocation + + Example: + >>> trigger = LibraryTrigger("my-function", ["wsk", "-i"]) + >>> result = trigger.sync_invoke({"key": "value"}) + """ + + def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None) -> None: + """ + Initialize library trigger for OpenWhisk function. + + Args: + fname: Name of the OpenWhisk action to invoke + wsk_cmd: Optional WSK CLI command prefix (including flags) + """ super().__init__() self.fname = fname if wsk_cmd: @@ -16,19 +49,53 @@ def __init__(self, fname: str, wsk_cmd: Optional[List[str]] = None): @staticmethod def trigger_type() -> "Trigger.TriggerType": + """ + Get the trigger type identifier. + + Returns: + TriggerType.LIBRARY for CLI-based invocation + """ return Trigger.TriggerType.LIBRARY @property def wsk_cmd(self) -> List[str]: + """ + Get the complete WSK CLI command for invocation. + + Returns: + List of command arguments for WSK CLI invocation + + Raises: + AssertionError: If wsk_cmd has not been set + """ assert self._wsk_cmd return self._wsk_cmd @wsk_cmd.setter - def wsk_cmd(self, wsk_cmd: List[str]): + def wsk_cmd(self, wsk_cmd: List[str]) -> None: + """ + Set the WSK CLI command prefix. + + Args: + wsk_cmd: WSK CLI command prefix (including any flags) + """ self._wsk_cmd = [*wsk_cmd, "action", "invoke", "--result", self.fname] @staticmethod - def get_command(payload: dict) -> List[str]: + def get_command(payload: Dict[str, Any]) -> List[str]: + """ + Convert payload dictionary to WSK CLI parameter arguments. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + List of CLI arguments for passing parameters to WSK + + Example: + >>> get_command({"key1": "value1", "key2": 42}) + ["--param", "key1", '"value1"', "--param", "key2", "42"] + """ params = [] for key, value in payload.items(): params.append("--param") @@ -36,7 +103,16 @@ def get_command(payload: dict) -> List[str]: params.append(json.dumps(value)) return params - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: Dict[str, Any]) -> ExecutionResult: + """ + Synchronously invoke the OpenWhisk function via CLI. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + ExecutionResult containing timing information and function output + """ command = self.wsk_cmd + self.get_command(payload) error = None try: @@ -63,49 +139,150 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: openwhisk_result.parse_benchmark_output(return_content) return openwhisk_result - def async_invoke(self, payload: dict) -> concurrent.futures.Future: + def async_invoke(self, payload: Dict[str, Any]) -> concurrent.futures.Future: + """ + Asynchronously invoke the OpenWhisk function via CLI. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + Future object that will contain the ExecutionResult + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut - def serialize(self) -> dict: + def serialize(self) -> Dict[str, str]: + """ + Serialize trigger configuration to dictionary. + + Returns: + Dictionary containing trigger type and function name + """ return {"type": "Library", "name": self.fname} @staticmethod - def deserialize(obj: dict) -> Trigger: + def deserialize(obj: Dict[str, str]) -> Trigger: + """ + Deserialize trigger from configuration dictionary. + + Args: + obj: Dictionary containing serialized trigger data + + Returns: + LibraryTrigger instance + """ return LibraryTrigger(obj["name"]) @staticmethod def typename() -> str: + """ + Get the trigger type name. + + Returns: + String identifier for this trigger type + """ return "OpenWhisk.LibraryTrigger" class HTTPTrigger(Trigger): - def __init__(self, fname: str, url: str): + """ + HTTP-based trigger for OpenWhisk web action invocation. + + This trigger uses HTTP requests to invoke OpenWhisk web actions, + providing an alternative to CLI-based invocation. It inherits HTTP + invocation capabilities from the base Trigger class. + + Attributes: + fname: Name of the OpenWhisk action + url: HTTP URL for the web action endpoint + + Example: + >>> trigger = HTTPTrigger( + ... "my-function", + ... "https://openwhisk.example.com/api/v1/web/guest/default/my-function.json" + ... ) + >>> result = trigger.sync_invoke({"key": "value"}) + """ + + def __init__(self, fname: str, url: str) -> None: + """ + Initialize HTTP trigger for OpenWhisk web action. + + Args: + fname: Name of the OpenWhisk action + url: HTTP URL for the web action endpoint + """ super().__init__() self.fname = fname self.url = url @staticmethod def typename() -> str: + """ + Get the trigger type name. + + Returns: + String identifier for this trigger type + """ return "OpenWhisk.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """ + Get the trigger type identifier. + + Returns: + TriggerType.HTTP for HTTP-based invocation + """ return Trigger.TriggerType.HTTP - def sync_invoke(self, payload: dict) -> ExecutionResult: + def sync_invoke(self, payload: Dict[str, Any]) -> ExecutionResult: + """ + Synchronously invoke the OpenWhisk function via HTTP. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + ExecutionResult containing timing information and function output + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url, False) - def async_invoke(self, payload: dict) -> concurrent.futures.Future: + def async_invoke(self, payload: Dict[str, Any]) -> concurrent.futures.Future: + """ + Asynchronously invoke the OpenWhisk function via HTTP. + + Args: + payload: Dictionary of parameters to pass to the function + + Returns: + Future object that will contain the ExecutionResult + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut - def serialize(self) -> dict: + def serialize(self) -> Dict[str, str]: + """ + Serialize trigger configuration to dictionary. + + Returns: + Dictionary containing trigger type, function name, and URL + """ return {"type": "HTTP", "fname": self.fname, "url": self.url} @staticmethod - def deserialize(obj: dict) -> Trigger: + def deserialize(obj: Dict[str, str]) -> Trigger: + """ + Deserialize trigger from configuration dictionary. + + Args: + obj: Dictionary containing serialized trigger data + + Returns: + HTTPTrigger instance + """ return HTTPTrigger(obj["fname"], obj["url"]) diff --git a/sebs/regression.py b/sebs/regression.py index 579760a1..6660e263 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -1,3 +1,21 @@ +"""Regression testing framework for serverless benchmarks across cloud providers. + +This module provides a flexible testing framework to validate benchmark functionality +across multiple cloud providers, runtimes, architectures, and deployment methods. +It automatically generates test cases for each valid combination and runs them +concurrently to efficiently validate the system. + +The module supports: +- AWS Lambda +- Azure Functions +- Google Cloud Functions +- OpenWhisk +- Multiple runtime languages (Python, Node.js) +- Multiple architectures (x64, arm64) +- Different deployment types (package, container) +- Different trigger types (HTTP, library) +""" + import logging import os import unittest @@ -13,38 +31,56 @@ if TYPE_CHECKING: from sebs import SeBS +# List of Python benchmarks available for regression testing benchmarks_python = [ - "110.dynamic-html", - "120.uploader", - "130.crud-api", - "210.thumbnailer", - "220.video-processing", - "311.compression", - "411.image-recognition", - "501.graph-pagerank", - "502.graph-mst", - "503.graph-bfs", - "504.dna-visualisation", + "110.dynamic-html", # Dynamic HTML generation + "120.uploader", # File upload handling + "130.crud-api", # CRUD API implementation + "210.thumbnailer", # Image thumbnail generation + "220.video-processing", # Video processing + "311.compression", # Data compression + "411.image-recognition", # ML-based image recognition + "501.graph-pagerank", # Graph PageRank algorithm + "502.graph-mst", # Graph minimum spanning tree + "503.graph-bfs", # Graph breadth-first search + "504.dna-visualisation", # DNA visualization ] + +# List of Node.js benchmarks available for regression testing benchmarks_nodejs = ["110.dynamic-html", "120.uploader", "210.thumbnailer"] +# AWS-specific configurations architectures_aws = ["x64", "arm64"] deployments_aws = ["package", "container"] +# GCP-specific configurations architectures_gcp = ["x64"] deployments_gcp = ["package"] +# Azure-specific configurations architectures_azure = ["x64"] deployments_azure = ["package"] +# OpenWhisk-specific configurations architectures_openwhisk = ["x64"] deployments_openwhisk = ["container"] -# user-defined config passed during initialization +# User-defined config passed during initialization, set in regression_suite() cloud_config: Optional[dict] = None class TestSequenceMeta(type): + """Metaclass for dynamically generating regression test cases. + + This metaclass automatically generates test methods for all combinations of + benchmark, architecture, and deployment type. Each test method deploys and + executes a specific benchmark on a specific cloud provider with a specific + configuration. + + The generated tests follow a naming convention: + test_{provider}_{benchmark}_{architecture}_{deployment_type} + """ + def __init__( cls, name, @@ -56,6 +92,19 @@ def __init__( deployment_name, triggers, ): + """Initialize the test class with deployment information. + + Args: + cls: The class being created + name: The name of the class + bases: Base classes + attrs: Class attributes + benchmarks: List of benchmark names to test + architectures: List of architectures to test (e.g., x64, arm64) + deployments: List of deployment types to test (e.g., package, container) + deployment_name: Name of the cloud provider (e.g., aws, azure) + triggers: List of trigger types to test (e.g., HTTP, library) + """ type.__init__(cls, name, bases, attrs) cls.deployment_name = deployment_name cls.triggers = triggers @@ -71,16 +120,59 @@ def __new__( deployment_name, triggers, ): + """Create a new test class with dynamically generated test methods. + + Args: + mcs: The metaclass + name: The name of the class + bases: Base classes + dict: Class attributes dictionary + benchmarks: List of benchmark names to test + architectures: List of architectures to test + deployments: List of deployment types to test + deployment_name: Name of the cloud provider + triggers: List of trigger types to test + + Returns: + A new test class with dynamically generated test methods + """ + def gen_test(benchmark_name, architecture, deployment_type): + """Generate a test function for a specific benchmark configuration. + + Args: + benchmark_name: Name of the benchmark to test + architecture: Architecture to test on + deployment_type: Deployment type to use + + Returns: + A test function that deploys and executes the benchmark + """ + def test(self): + """Test function that deploys and executes a benchmark. + + This function: + 1. Sets up logging + 2. Gets a deployment client + 3. Configures the benchmark + 4. Deploys the function + 5. Invokes the function with different triggers + 6. Verifies the function execution + + Raises: + RuntimeError: If the benchmark execution fails + """ log_name = f"Regression-{deployment_name}-{benchmark_name}-{deployment_type}" logger = logging.getLogger(log_name) logger.setLevel(logging.INFO) logging_wrapper = ColoredWrapper(log_name, logger) + # Configure experiment settings self.experiment_config["architecture"] = architecture self.experiment_config["container_deployment"] = deployment_type == "container" + # Get deployment client for the specific cloud provider deployment_client = self.get_deployment( benchmark_name, architecture, deployment_type ) @@ -91,34 +183,37 @@ def test(self): f"Architecture {architecture}, deployment type: {deployment_type}." ) + # Get experiment configuration and deploy the benchmark experiment_config = self.client.get_experiment_config(self.experiment_config) - benchmark = self.client.get_benchmark( benchmark_name, deployment_client, experiment_config ) + + # Prepare input data for the benchmark input_config = benchmark.prepare_input( deployment_client.system_resources, size="test", replace_existing=experiment_config.update_storage, ) + + # Get or create the function func = deployment_client.get_function( benchmark, deployment_client.default_function_name(benchmark) ) + # Test each trigger type failure = False for trigger_type in triggers: if len(func.triggers(trigger_type)) > 0: trigger = func.triggers(trigger_type)[0] else: trigger = deployment_client.create_trigger(func, trigger_type) - """ - sleep 5 seconds - on some cloud systems the triggers might - not be available immediately. - for example, AWS tends to throw "not exist" on newly created - API gateway - """ + # Sleep to allow trigger creation to propagate + # Some cloud systems (e.g., AWS API Gateway) need time + # before the trigger is ready to use sleep(5) - # Synchronous invoke + + # Synchronous invoke to test function try: ret = trigger.sync_invoke(input_config) if ret.stats.failure: @@ -133,22 +228,27 @@ def test(self): except RuntimeError: failure = True logging_wrapper.error(f"{benchmark_name} fail on trigger: {trigger_type}") + + # Clean up resources deployment_client.shutdown() + + # Report overall test result if failure: raise RuntimeError(f"Test of {benchmark_name} failed!") return test + # Generate test methods for each combination for benchmark in benchmarks: for architecture in architectures: for deployment_type in deployments: - # for trigger in triggers: test_name = f"test_{deployment_name}_{benchmark}" test_name += f"_{architecture}_{deployment_type}" dict[test_name] = gen_test(benchmark, architecture, deployment_type) - dict["lock"] = threading.Lock() - dict["cfg"] = None + # Add shared resources + dict["lock"] = threading.Lock() # Lock for thread-safe initialization + dict["cfg"] = None # Shared configuration return type.__new__(mcs, name, bases, dict) @@ -161,20 +261,50 @@ class AWSTestSequencePython( deployment_name="aws", triggers=[Trigger.TriggerType.LIBRARY, Trigger.TriggerType.HTTP], ): + """Test suite for Python benchmarks on AWS Lambda. + + Attributes: + benchmarks: List of Python benchmarks to test + architectures: List of AWS architectures to test (x64, arm64) + deployments: List of deployment types to test (package, container) + deployment_name: Cloud provider name ("aws") + triggers: List of trigger types to test (LIBRARY, HTTP) + """ + @property def typename(self) -> str: + """Get the type name of this test suite. + + Returns: + A string identifier for this test suite + """ return "AWSTestPython" def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an AWS deployment client for the specified configuration. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64, arm64) + deployment_type: Deployment type (package, container) + + Returns: + An initialized AWS deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "aws" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + # Create a log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), ) + # Synchronize resource initialization with a lock with AWSTestSequencePython.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -189,14 +319,41 @@ class AWSTestSequenceNodejs( deployment_name="aws", triggers=[Trigger.TriggerType.LIBRARY, Trigger.TriggerType.HTTP], ): + """Test suite for Node.js benchmarks on AWS Lambda. + + Attributes: + benchmarks: List of Node.js benchmarks to test + architectures: List of AWS architectures to test (x64, arm64) + deployments: List of deployment types to test (package, container) + deployment_name: Cloud provider name ("aws") + triggers: List of trigger types to test (LIBRARY, HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an AWS deployment client for the specified configuration. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64, arm64) + deployment_type: Deployment type (package, container) + + Returns: + An initialized AWS deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "aws" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + + # Create a log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with AWSTestSequenceNodejs.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -211,10 +368,40 @@ class AzureTestSequencePython( deployment_name="azure", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Python benchmarks on Azure Functions. + + Attributes: + benchmarks: List of Python benchmarks to test + architectures: List of Azure architectures to test (x64) + deployments: List of deployment types to test (package) + deployment_name: Cloud provider name ("azure") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an Azure deployment client for the specified configuration. + + This method handles special Azure setup requirements, including: + - Caching deployment configuration to avoid recreating it for each test + - Initializing the Azure CLI for resource management + - Setting up system resources with proper authentication + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (package) + + Returns: + An initialized Azure deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "azure" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + with AzureTestSequencePython.lock: + # Cache the deployment configuration for reuse across tests if not AzureTestSequencePython.cfg: AzureTestSequencePython.cfg = self.client.get_deployment_config( cloud_config["deployment"], @@ -224,11 +411,13 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): ), ) + # Initialize Azure CLI if not already done if not hasattr(AzureTestSequencePython, "cli"): AzureTestSequencePython.cli = AzureCLI( self.client.config, self.client.docker_client ) + # Create log file name and get deployment client f = f"regression_{deployment_name}_{benchmark_name}_" f += f"{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( @@ -236,6 +425,8 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): logging_filename=os.path.join(self.client.output_dir, f), deployment_config=AzureTestSequencePython.cfg, ) + + # Initialize CLI with login and setup resources deployment_client.system_resources.initialize_cli( cli=AzureTestSequencePython.cli, login=True ) @@ -252,28 +443,62 @@ class AzureTestSequenceNodejs( deployment_name="azure", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Node.js benchmarks on Azure Functions. + + Attributes: + benchmarks: List of Node.js benchmarks to test + architectures: List of Azure architectures to test (x64) + deployments: List of deployment types to test (package) + deployment_name: Cloud provider name ("azure") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an Azure deployment client for the specified configuration. + + This method handles special Azure setup requirements, including: + - Caching deployment configuration to avoid recreating it for each test + - Initializing the Azure CLI for resource management + - Setting up system resources + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (package) + + Returns: + An initialized Azure deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "azure" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + with AzureTestSequenceNodejs.lock: + # Cache the deployment configuration for reuse across tests if not AzureTestSequenceNodejs.cfg: AzureTestSequenceNodejs.cfg = self.client.get_deployment_config( cloud_config["deployment"], logging_filename=f"regression_{deployment_name}_{benchmark_name}.log", ) + # Initialize Azure CLI if not already done if not hasattr(AzureTestSequenceNodejs, "cli"): AzureTestSequenceNodejs.cli = AzureCLI( self.client.config, self.client.docker_client ) + # Create log file name and get deployment client f = f"regression_{deployment_name}_{benchmark_name}_" f += f"{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), - deployment_config=AzureTestSequencePython.cfg, + deployment_config=AzureTestSequencePython.cfg, # Note: This uses Python config ) + + # Initialize CLI and setup resources (no login needed - reuses Python session) deployment_client.system_resources.initialize_cli(cli=AzureTestSequenceNodejs.cli) deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -288,14 +513,41 @@ class GCPTestSequencePython( deployment_name="gcp", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Python benchmarks on Google Cloud Functions. + + Attributes: + benchmarks: List of Python benchmarks to test + architectures: List of GCP architectures to test (x64) + deployments: List of deployment types to test (package) + deployment_name: Cloud provider name ("gcp") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get a GCP deployment client for the specified configuration. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (package) + + Returns: + An initialized Google Cloud Functions deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "gcp" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + + # Create log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with GCPTestSequencePython.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -310,14 +562,41 @@ class GCPTestSequenceNodejs( deployment_name="gcp", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Node.js benchmarks on Google Cloud Functions. + + Attributes: + benchmarks: List of Node.js benchmarks to test + architectures: List of GCP architectures to test (x64) + deployments: List of deployment types to test (package) + deployment_name: Cloud provider name ("gcp") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get a GCP deployment client for the specified configuration. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (package) + + Returns: + An initialized Google Cloud Functions deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "gcp" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + + # Create log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with GCPTestSequenceNodejs.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -332,19 +611,50 @@ class OpenWhiskTestSequencePython( deployment_name="openwhisk", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Python benchmarks on OpenWhisk. + + Attributes: + benchmarks: List of Python benchmarks to test + architectures: List of OpenWhisk architectures to test (x64) + deployments: List of deployment types to test (container) + deployment_name: Cloud provider name ("openwhisk") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an OpenWhisk deployment client for the specified configuration. + + This method handles special OpenWhisk setup requirements, including + creating a modified configuration with architecture and deployment + type settings. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (container) + + Returns: + An initialized OpenWhisk deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "openwhisk" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + # Create a copy of the config and set architecture and deployment type config_copy = cloud_config.copy() config_copy["experiments"]["architecture"] = architecture config_copy["experiments"]["container_deployment"] = deployment_type == "container" + # Create log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( config_copy, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with OpenWhiskTestSequencePython.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client @@ -359,52 +669,116 @@ class OpenWhiskTestSequenceNodejs( deployment_name="openwhisk", triggers=[Trigger.TriggerType.HTTP], ): + """Test suite for Node.js benchmarks on OpenWhisk. + + Attributes: + benchmarks: List of Node.js benchmarks to test + architectures: List of OpenWhisk architectures to test (x64) + deployments: List of deployment types to test (container) + deployment_name: Cloud provider name ("openwhisk") + triggers: List of trigger types to test (HTTP) + """ + def get_deployment(self, benchmark_name, architecture, deployment_type): + """Get an OpenWhisk deployment client for the specified configuration. + + This method handles special OpenWhisk setup requirements, including + creating a modified configuration with architecture and deployment + type settings. + + Args: + benchmark_name: Name of the benchmark to deploy + architecture: Architecture to deploy on (x64) + deployment_type: Deployment type (container) + + Returns: + An initialized OpenWhisk deployment client + + Raises: + AssertionError: If cloud_config is not set + """ deployment_name = "openwhisk" - assert cloud_config + assert cloud_config, "Cloud configuration is required" + # Create a copy of the config and set architecture and deployment type config_copy = cloud_config.copy() config_copy["experiments"]["architecture"] = architecture config_copy["experiments"]["container_deployment"] = deployment_type == "container" + # Create log file name based on test parameters f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( config_copy, logging_filename=os.path.join(self.client.output_dir, f), ) + + # Synchronize resource initialization with a lock with OpenWhiskTestSequenceNodejs.lock: deployment_client.initialize(resource_prefix="regr") return deployment_client -# https://stackoverflow.com/questions/22484805/a-simple-working-example-for-testtools-concurrentstreamtestsuite +# Stream result handler for concurrent test execution +# Based on https://stackoverflow.com/questions/22484805/ +# a-simple-working-example-for-testtools-concurrentstreamtestsuite class TracingStreamResult(testtools.StreamResult): + """Stream result handler for concurrent test execution. + + This class captures test execution results and maintains running state + for all tests. It tracks successful tests, failed tests, and collects + test output for reporting. + + Attributes: + all_correct: Whether all tests have passed + output: Dictionary mapping test IDs to their output bytes + success: Set of test names that succeeded + failures: Set of test names that failed + """ + all_correct: bool output: Dict[str, bytes] = {} def __init__(self): + """Initialize a new stream result handler. + + Sets up initial state for tracking test results. + """ self.all_correct = True self.success = set() self.failures = set() - # no way to directly access test instance from here def status(self, *args, **kwargs): + """Process a test status update. + + This method is called by the test runner to report on test progress + and results. It parses test IDs, collects output, and tracks success/failure. + + Args: + *args: Variable length argument list (not used) + **kwargs: Keyword arguments including test_id, test_status, and file_bytes + """ + # Update overall test status (only inprogress and success states are considered passing) self.all_correct = self.all_correct and (kwargs["test_status"] in ["inprogress", "success"]) + # Extract benchmark, architecture, and deployment type from test ID bench, arch, deployment_type = kwargs["test_id"].split("_")[-3:None] test_name = f"{bench}, {arch}, {deployment_type}" + if not kwargs["test_status"]: + # Collect test output test_id = kwargs["test_id"] if test_id not in self.output: self.output[test_id] = b"" self.output[test_id] += kwargs["file_bytes"] elif kwargs["test_status"] == "fail": + # Handle test failure print("\n-------------\n") print("{0[test_id]}: {0[test_status]}".format(kwargs)) print("{0[test_id]}: {1}".format(kwargs, self.output[kwargs["test_id"]].decode())) print("\n-------------\n") self.failures.add(test_name) elif kwargs["test_status"] == "success": + # Track successful tests self.success.add(test_name) @@ -415,19 +789,39 @@ def filter_out_benchmarks( language_version: str, architecture: str, ) -> bool: + """Filter out benchmarks that are not supported on specific platforms. + + Some benchmarks are not compatible with certain runtime environments due + to memory constraints, unsupported libraries, or other limitations. + This function identifies those incompatible combinations. + + Args: + benchmark: The benchmark name to check + deployment_name: Cloud provider name (aws, azure, gcp, openwhisk) + language: Runtime language (python, nodejs) + language_version: Language version (e.g., "3.9", "3.10") + architecture: CPU architecture (x64, arm64) + + Returns: + bool: True if the benchmark should be included, False to filter it out + """ # fmt: off + # Filter out image recognition on newer Python versions on AWS if (deployment_name == "aws" and language == "python" and language_version in ["3.9", "3.10", "3.11"]): return "411.image-recognition" not in benchmark + # Filter out image recognition on ARM architecture on AWS if (deployment_name == "aws" and architecture == "arm64"): return "411.image-recognition" not in benchmark + # Filter out image recognition on newer Python versions on GCP if (deployment_name == "gcp" and language == "python" and language_version in ["3.8", "3.9", "3.10", "3.11", "3.12"]): return "411.image-recognition" not in benchmark # fmt: on + # All other benchmarks are supported return True @@ -438,34 +832,72 @@ def regression_suite( deployment_config: dict, benchmark_name: Optional[str] = None, ): + """Create and run a regression test suite for specified cloud providers. + + This function creates a test suite with all applicable test combinations for + the selected cloud providers and runtime configuration. It then runs the tests + concurrently and reports on successes and failures. + + Args: + sebs_client: The SeBS client instance + experiment_config: Configuration dictionary for the experiment + providers: Set of cloud provider names to test + deployment_config: Configuration dictionary for deployments + benchmark_name: Optional name of a specific benchmark to test + + Returns: + bool: True if any tests failed, False if all tests succeeded + + Raises: + AssertionError: If a requested provider is not in the deployment config + """ + # Create the test suite suite = unittest.TestSuite() + + # Make cloud_config available to test classes global cloud_config cloud_config = deployment_config + # Extract runtime configuration language = experiment_config["runtime"]["language"] language_version = experiment_config["runtime"]["version"] architecture = experiment_config["architecture"] + # Add AWS tests if requested if "aws" in providers: - assert "aws" in cloud_config["deployment"] + assert ( + "aws" in cloud_config["deployment"] + ), "AWS provider requested but not in deployment config" if language == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AWSTestSequencePython)) elif language == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AWSTestSequenceNodejs)) + + # Add GCP tests if requested if "gcp" in providers: - assert "gcp" in cloud_config["deployment"] + assert ( + "gcp" in cloud_config["deployment"] + ), "GCP provider requested but not in deployment config" if language == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(GCPTestSequencePython)) elif language == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(GCPTestSequenceNodejs)) + + # Add Azure tests if requested if "azure" in providers: - assert "azure" in cloud_config["deployment"] + assert ( + "azure" in cloud_config["deployment"] + ), "Azure provider requested but not in deployment config" if language == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequencePython)) elif language == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequenceNodejs)) + + # Add OpenWhisk tests if requested if "openwhisk" in providers: - assert "openwhisk" in cloud_config["deployment"] + assert ( + "openwhisk" in cloud_config["deployment"] + ), "OpenWhisk provider requested but not in deployment config" if language == "python": suite.addTest( unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequencePython) @@ -475,37 +907,44 @@ def regression_suite( unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequenceNodejs) ) + # Prepare the list of tests to run tests = [] - # mypy is confused here + # mypy is confused here about the type for case in suite: for test in case: # type: ignore - # skip + # Get the test method name test_name = cast(unittest.TestCase, test)._testMethodName - # Remove unsupported benchmarks + # Filter out unsupported benchmarks if not filter_out_benchmarks( test_name, test.deployment_name, # type: ignore - language, # type: ignore + language, language_version, architecture, # type: ignore ): print(f"Skip test {test_name} - not supported.") continue - # Use only a selected benchmark + # Filter by benchmark name if specified if not benchmark_name or (benchmark_name and benchmark_name in test_name): + # Set up test instance with client and config test.client = sebs_client # type: ignore test.experiment_config = experiment_config.copy() # type: ignore tests.append(test) else: print(f"Skip test {test_name}") + # Create a concurrent test suite for parallel execution concurrent_suite = testtools.ConcurrentStreamTestSuite(lambda: ((test, None) for test in tests)) result = TracingStreamResult() + + # Run the tests result.startTestRun() concurrent_suite.run(result) result.stopTestRun() + + # Report results print(f"Succesfully executed {len(result.success)} out of {len(tests)} functions") for suc in result.success: print(f"- {suc}") @@ -514,9 +953,11 @@ def regression_suite( for failure in result.failures: print(f"- {failure}") + # Clean up resources if hasattr(AzureTestSequenceNodejs, "cli"): AzureTestSequenceNodejs.cli.shutdown() if hasattr(AzureTestSequencePython, "cli"): AzureTestSequencePython.cli.shutdown() + # Return True if any test failed return not result.all_correct diff --git a/sebs/sebs.py b/sebs/sebs.py index 309c0b25..993d652a 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -1,3 +1,16 @@ +"""Main SeBS (Serverless Benchmarking Suite) client implementation. + +This module provides the main interface for SeBS: +- Deployment client creation for different platforms (AWS, Azure, GCP, OpenWhisk, local) +- Benchmark execution and configuration +- Experiment setup and execution +- Storage access (object storage and NoSQL) +- Caching and Docker management +- Logging and output handling + +The SeBS client is the central point of interaction for both the CLI and programmatic use. +""" + import os from typing import Optional, Dict, Type @@ -20,31 +33,83 @@ class SeBS(LoggingBase): + """Main client for the Serverless Benchmarking Suite. + + Attributes: + cache_client: Client for managing cached artifacts (code packages, etc.) + docker_client: Docker client for container operations + output_dir: Directory for storing output files and logs + verbose: Whether to enable verbose logging + logging_filename: Default log file name + config: Global SeBS configuration + """ + @property def cache_client(self) -> Cache: + """Get the cache client. + + Returns: + Cache client for managing cached artifacts + """ return self._cache_client @property - def docker_client(self) -> docker.client: + def docker_client(self) -> docker.client.DockerClient: + """Get the Docker client. + + Returns: + Docker client for container operations + """ return self._docker_client @property def output_dir(self) -> str: + """Get the output directory. + + Returns: + Path to the output directory + """ return self._output_dir @property def verbose(self) -> bool: + """Get the verbose flag. + + Returns: + Whether verbose logging is enabled + """ return self._verbose @property def logging_filename(self) -> Optional[str]: + """Get the default logging filename. + + Returns: + Default logging filename or None if not set + """ return self._logging_filename @property def config(self) -> SeBSConfig: + """Get the global SeBS configuration. + + Returns: + Global configuration object + """ return self._config def generate_logging_handlers(self, logging_filename: Optional[str] = None) -> LoggingHandlers: + """Generate logging handlers for a specific file. + + This method creates or retrieves cached logging handlers for a given filename. + If no filename is provided, the default logging filename is used. + + Args: + logging_filename: Optional filename for logs, defaults to self.logging_filename + + Returns: + LoggingHandlers configured for the specified file + """ filename = logging_filename if logging_filename else self.logging_filename if filename in self._handlers: return self._handlers[filename] @@ -60,6 +125,21 @@ def __init__( verbose: bool = False, logging_filename: Optional[str] = None, ): + """Initialize the SeBS client. + + Creates a new SeBS client with the specified configuration. This sets up: + - Docker client + - Cache client + - Global configuration + - Logging handlers + - Output directory + + Args: + cache_dir: Directory for caching artifacts + output_dir: Directory for storing output files and logs + verbose: Whether to enable verbose logging (default: False) + logging_filename: Default log file name (default: None) + """ super().__init__() self._docker_client = docker.from_env() self._cache_client = Cache(cache_dir, self._docker_client) @@ -70,12 +150,16 @@ def __init__( self._handlers: Dict[Optional[str], LoggingHandlers] = {} self.logging_handlers = self.generate_logging_handlers() + # Create output directory if it doesn't exist os.makedirs(self.output_dir, exist_ok=True) def ignore_cache(self): - """ - The cache will only store code packages, - and won't update new functions and storage. + """Configure the cache to only store code packages. + + After calling this method, the cache will only store code packages + and won't update or use cached functions and storage. This is useful + when you want to ensure that functions are redeployed and storage + is recreated, but still want to reuse code packages. """ self._cache_client.ignore_storage = True self._cache_client.ignore_functions = True @@ -86,10 +170,33 @@ def get_deployment( logging_filename: Optional[str] = None, deployment_config: Optional[Config] = None, ) -> FaaSSystem: + """Get a deployment client for a specific cloud platform. + + This method creates and returns a deployment client for the specified + cloud platform. It validates that the requested platform and configuration + are supported, and initializes the client with the appropriate resources. + + The method dynamically imports the necessary modules for each platform + based on what's available in the environment, determined by has_platform(). + + Args: + config: Configuration dictionary with deployment and experiment settings + logging_filename: Optional filename for logs + deployment_config: Optional pre-configured deployment config + + Returns: + An initialized FaaS system deployment client + + Raises: + RuntimeError: If the requested deployment is not supported or if the + configuration is invalid (unsupported architecture, + deployment type, etc.) + """ dep_config = config["deployment"] name = dep_config["name"] implementations: Dict[str, Type[FaaSSystem]] = {"local": Local} + # Dynamically import platform-specific modules as needed if has_platform("aws"): from sebs.aws import AWS @@ -107,9 +214,11 @@ def get_deployment( implementations["openwhisk"] = OpenWhisk + # Validate deployment platform if name not in implementations: raise RuntimeError("Deployment {name} not supported!".format(name=name)) + # Validate architecture if config["experiments"]["architecture"] not in self._config.supported_architecture(name): raise RuntimeError( "{architecture} is not supported in {name}".format( @@ -117,21 +226,24 @@ def get_deployment( ) ) + # Validate deployment type - container if config["experiments"][ "container_deployment" ] and not self._config.supported_container_deployment(name): raise RuntimeError(f"Container deployment is not supported in {name}.") + # Validate deployment type - package if not config["experiments"][ "container_deployment" ] and not self._config.supported_package_deployment(name): raise RuntimeError(f"Code package deployment is not supported in {name}.") - # FIXME: future annotations, requires Python 3.7+ + # Set up logging and create deployment configuration handlers = self.generate_logging_handlers(logging_filename) if not deployment_config: deployment_config = Config.deserialize(dep_config, self.cache_client, handlers) + # Create and return the deployment client deployment_client = implementations[name]( self._config, deployment_config, # type: ignore @@ -146,15 +258,57 @@ def get_deployment_config( config: dict, logging_filename: Optional[str] = None, ) -> Config: + """Create a deployment configuration from a dictionary. + + This method deserializes a deployment configuration from a dictionary, + setting up logging handlers and connecting it to the cache client. + + Args: + config: Configuration dictionary + logging_filename: Optional filename for logs + + Returns: + A deserialized deployment configuration object + """ handlers = self.generate_logging_handlers(logging_filename) return Config.deserialize(config, self.cache_client, handlers) def get_experiment_config(self, config: dict) -> ExperimentConfig: + """Create an experiment configuration from a dictionary. + + This method deserializes an experiment configuration from a dictionary. + The experiment configuration contains settings specific to the + experiment being run, such as the number of iterations, timeout, etc. + + Args: + config: Configuration dictionary + + Returns: + A deserialized experiment configuration object + """ return ExperimentConfig.deserialize(config) def get_experiment( self, experiment_type: str, config: dict, logging_filename: Optional[str] = None ) -> Experiment: + """Get an experiment implementation for a specific experiment type. + + This method creates and returns an experiment implementation for the + specified experiment type. It validates that the requested experiment + type is supported and initializes the experiment with the appropriate + configuration. + + Args: + experiment_type: Type of experiment to create (e.g., "perf-cost") + config: Configuration dictionary + logging_filename: Optional filename for logs + + Returns: + An initialized experiment implementation + + Raises: + RuntimeError: If the requested experiment type is not supported + """ from sebs.experiments import ( Experiment, PerfCost, @@ -163,14 +317,19 @@ def get_experiment( EvictionModel, ) + # Map of supported experiment types to their implementations implementations: Dict[str, Type[Experiment]] = { "perf-cost": PerfCost, "network-ping-pong": NetworkPingPong, "invocation-overhead": InvocationOverhead, "eviction-model": EvictionModel, } + + # Validate experiment type if experiment_type not in implementations: raise RuntimeError(f"Experiment {experiment_type} not supported!") + + # Create and configure the experiment experiment = implementations[experiment_type](self.get_experiment_config(config)) experiment.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename @@ -184,6 +343,22 @@ def get_benchmark( config: ExperimentConfig, logging_filename: Optional[str] = None, ) -> Benchmark: + """Get a benchmark implementation for a specific benchmark. + + This method creates and returns a benchmark implementation for the + specified benchmark name. It configures the benchmark with the + appropriate deployment, configuration, and resources. + + Args: + name: Name of the benchmark to create (e.g., "210.thumbnailer") + deployment: FaaS system deployment client + config: Experiment configuration + logging_filename: Optional filename for logs + + Returns: + An initialized benchmark implementation + """ + # Create and configure the benchmark benchmark = Benchmark( name, deployment.name(), @@ -193,6 +368,8 @@ def get_benchmark( self.cache_client, self.docker_client, ) + + # Set up logging benchmark.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename ) @@ -200,37 +377,117 @@ def get_benchmark( @staticmethod def get_storage_implementation(storage_type: types.Storage) -> Type[PersistentStorage]: + """Get a storage implementation for a specific storage type. + + This method returns the class for a persistent storage implementation + for the specified storage type. + + Args: + storage_type: Type of storage to get implementation for + + Returns: + Storage implementation class + + Raises: + AssertionError: If the requested storage type is not supported + """ _storage_implementations = {types.Storage.MINIO: minio.Minio} impl = _storage_implementations.get(storage_type) - assert impl + assert impl, f"Storage type {storage_type} not supported" return impl @staticmethod def get_nosql_implementation(storage_type: types.NoSQLStorage) -> Type[NoSQLStorage]: + """Get a NoSQL storage implementation for a specific storage type. + + This method returns the class for a NoSQL storage implementation + for the specified storage type. + + Args: + storage_type: Type of NoSQL storage to get implementation for + + Returns: + NoSQL storage implementation class + + Raises: + AssertionError: If the requested storage type is not supported + """ _storage_implementations = {types.NoSQLStorage.SCYLLADB: scylladb.ScyllaDB} impl = _storage_implementations.get(storage_type) - assert impl + assert impl, f"NoSQL storage type {storage_type} not supported" return impl @staticmethod def get_storage_config_implementation(storage_type: types.Storage): + """Get a storage configuration implementation for a specific storage type. + + This method returns the class for a storage configuration implementation + for the specified storage type. + + Args: + storage_type: Type of storage to get configuration for + + Returns: + Storage configuration implementation class + + Raises: + AssertionError: If the requested storage type is not supported + """ _storage_implementations = {types.Storage.MINIO: config.MinioConfig} impl = _storage_implementations.get(storage_type) - assert impl + assert impl, f"Storage configuration for type {storage_type} not supported" return impl @staticmethod def get_nosql_config_implementation(storage_type: types.NoSQLStorage): + """Get a NoSQL configuration implementation for a specific storage type. + + This method returns the class for a NoSQL configuration implementation + for the specified storage type. + + Args: + storage_type: Type of NoSQL storage to get configuration for + + Returns: + NoSQL configuration implementation class + + Raises: + AssertionError: If the requested storage type is not supported + """ _storage_implementations = {types.NoSQLStorage.SCYLLADB: config.ScyllaDBConfig} impl = _storage_implementations.get(storage_type) - assert impl + assert impl, f"NoSQL configuration for type {storage_type} not supported" return impl def shutdown(self): + """Shutdown the SeBS client and release resources. + + This method shuts down the cache client and releases any resources + that need to be cleaned up when the client is no longer needed. + It is automatically called when using the client as a context manager. + """ self.cache_client.shutdown() def __enter__(self): + """Enter context manager. + + This method allows the SeBS client to be used as a context manager + using the 'with' statement, which ensures proper cleanup of resources. + + Returns: + The SeBS client instance + """ return self - def __exit__(self): + def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): + """Exit context manager. + + This method is called when exiting a 'with' block. It ensures that + resources are properly cleaned up by calling shutdown(). + + Args: + exc_type: Exception type if an exception occurred, None otherwise + exc_val: Exception value if an exception occurred, None otherwise + exc_tb: Exception traceback if an exception occurred, None otherwise + """ self.shutdown() diff --git a/sebs/statistics.py b/sebs/statistics.py index 8d00b855..01e3e385 100644 --- a/sebs/statistics.py +++ b/sebs/statistics.py @@ -1,3 +1,11 @@ +"""Statistical analysis utilities for benchmark experiments. + +This module provides functions for computing basic statistics and confidence +intervals on benchmark experiment results. It includes both parametric +(Student's t-distribution) and non-parametric (Le Boudec) methods for +computing confidence intervals. +""" + import math from typing import List, Tuple from collections import namedtuple @@ -5,31 +13,74 @@ import numpy as np import scipy.stats as st +# Named tuple for basic statistics results BasicStats = namedtuple("BasicStats", "mean median std cv") def basic_stats(times: List[float]) -> BasicStats: + """Compute basic statistics for a list of measurement times. + + This function computes the mean, median, standard deviation, and + coefficient of variation for a list of measurement times. + + Args: + times: List of measurement times + + Returns: + A BasicStats named tuple with the computed statistics + """ mean = np.mean(times) median = np.median(times) std = np.std(times) - cv = std / mean * 100 + cv = std / mean * 100 # Coefficient of variation as percentage return BasicStats(mean, median, std, cv) def ci_tstudents(alpha: float, times: List[float]) -> Tuple[float, float]: + """Compute parametric confidence interval using Student's t-distribution. + + This is a parametric method that assumes the data follows a normal distribution. + + Args: + alpha: Confidence level (e.g., 0.95 for 95% confidence) + times: List of measurement times + + Returns: + A tuple (lower, upper) representing the confidence interval + """ mean = np.mean(times) return st.t.interval(alpha, len(times) - 1, loc=mean, scale=st.sem(times)) def ci_le_boudec(alpha: float, times: List[float]) -> Tuple[float, float]: + """Compute non-parametric confidence interval using Le Boudec's method. + + It requires a sufficient number of samples but it is a non-parametric + method that does not assume that data follows the normal distribution. + + Reference: + J.-Y. Le Boudec, "Performance Evaluation of Computer and + Communication Systems", 2010. + + Args: + alpha: Confidence level (e.g., 0.95 for 95% confidence) + times: List of measurement times + + Returns: + A tuple (lower, upper) representing the confidence interval + Raises: + AssertionError: If an unsupported confidence level is provided + """ sorted_times = sorted(times) n = len(times) - # z(alfa/2) + # Z-values for common confidence levels + # z(alpha/2) for two-sided interval z_value = {0.95: 1.96, 0.99: 2.576}.get(alpha) - assert z_value + assert z_value, f"Unsupported confidence level: {alpha}" + # Calculate positions in the sorted array low_pos = math.floor((n - z_value * math.sqrt(n)) / 2) high_pos = math.ceil(1 + (n + z_value * math.sqrt(n)) / 2) diff --git a/sebs/storage/__init__.py b/sebs/storage/__init__.py index e69de29b..65dec561 100644 --- a/sebs/storage/__init__.py +++ b/sebs/storage/__init__.py @@ -0,0 +1,34 @@ +"""This module provides storage abstractions and implementations for SeBS, +supporting both object storage (S3-compatible) and NoSQL database storage. + +It includes: +- Configuration classes for different storage backends +- MinIO implementation for local S3-compatible object storage +- ScyllaDB implementation for local DynamoDB-compatible NoSQL storage +- Resource management classes for self-hosted storage deployments + +The storage module enables benchmarks to work with persistent data storage +across different deployment environments while maintaining consistent interfaces. +Thus, we can seamlessly port benchmarks between clouds and open-source +serverless platforms. + +Key Components: + - config: Configuration dataclasses for storage backends + - minio: MinIO-based object storage implementation + - scylladb: ScyllaDB-based NoSQL storage implementation + - resources: Resource management for self-hosted storage deployments + +Example: + To use MinIO object storage in a benchmark: + + ```python + from sebs.storage.minio import Minio + from sebs.storage.config import MinioConfig + + # Configure and start MinIO + config = MinioConfig(mapped_port=9000, version="latest") + storage = Minio(docker_client, cache_client, resources, False) + storage.config = config + storage.start() + ``` +""" diff --git a/sebs/storage/config.py b/sebs/storage/config.py index cd47df39..e68262de 100644 --- a/sebs/storage/config.py +++ b/sebs/storage/config.py @@ -1,25 +1,71 @@ -from abc import ABC -from abc import abstractmethod -from typing import List +"""Configuration classes for storage backends in the Serverless Benchmarking Suite. +All configuration classes support serialization/deserialization for caching +and provide environment variable mappings for runtime configuration. +""" + +from abc import ABC, abstractmethod from dataclasses import dataclass, field +from typing import Any, Dict, List from sebs.cache import Cache @dataclass class PersistentStorageConfig(ABC): + """Abstract base class for persistent object storage configuration. + + This class defines the interface that all object storage configurations + must implement. It provides methods for serialization and environment + variable generation that are used for caching and runtime configuration. + + This is used by MinioStorage in different deployments. + + Subclasses must implement: + - serialize(): Convert configuration to dictionary for caching + - envs(): Generate environment variables for benchmark runtime + """ + @abstractmethod - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """Serialize the configuration to a dictionary. + + Returns: + Dict[str, Any]: Serialized configuration data suitable for JSON storage + """ pass @abstractmethod - def envs(self) -> dict: + def envs(self) -> Dict[str, str]: + """Generate environment variables for the storage configuration. + + Returns: + Dict[str, str]: Environment variables to be set in benchmark runtime + """ pass @dataclass class MinioConfig(PersistentStorageConfig): + """Configuration for MinIO object storage. + + MinIO provides a local S3-compatible object storage service that runs in + a Docker container. This configuration class stores all the necessary + parameters for deploying and connecting to a MinIO instance. + + Attributes: + address: Network address where MinIO is accessible (auto-detected) + mapped_port: Host port mapped to MinIO's internal port 9000 + access_key: Access key for MinIO authentication (auto-generated) + secret_key: Secret key for MinIO authentication (auto-generated) + instance_id: Docker container ID of the running MinIO instance + output_buckets: List of bucket names used for benchmark output + input_buckets: List of bucket names used for benchmark input + version: MinIO Docker image version to use + data_volume: Host directory path for persistent data storage + type: Storage type identifier (always "minio") + """ + address: str = "" mapped_port: int = -1 access_key: str = "" @@ -31,16 +77,36 @@ class MinioConfig(PersistentStorageConfig): data_volume: str = "" type: str = "minio" - def update_cache(self, path: List[str], cache: Cache): + def update_cache(self, path: List[str], cache: Cache) -> None: + """Update the cache with this configuration's values. + + Stores all configuration fields in the cache using the specified path + as a prefix. This allows the configuration to be restored later from + the cache. + Args: + path: Cache key path prefix for this configuration + cache: Cache instance to store configuration in + """ for key in MinioConfig.__dataclass_fields__.keys(): if key == "resources": continue cache.update_config(val=getattr(self, key), keys=[*path, key]) - # self.resources.update_cache(cache) @staticmethod - def deserialize(data: dict) -> "MinioConfig": + def deserialize(data: Dict[str, Any]) -> "MinioConfig": + """Deserialize configuration from a dictionary. + + Creates a new MinioConfig instance from dictionary data, typically + loaded from cache or configuration files. Only known configuration + fields are used, unknown fields are ignored. + + Args: + data: Dictionary containing configuration data + + Returns: + MinioConfig: New configuration instance + """ keys = list(MinioConfig.__dataclass_fields__.keys()) data = {k: v for k, v in data.items() if k in keys} @@ -48,10 +114,23 @@ def deserialize(data: dict) -> "MinioConfig": return cfg - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """Serialize the configuration to a dictionary. + + Returns: + Dict[str, Any]: All configuration fields as a dictionary + """ return self.__dict__ - def envs(self) -> dict: + def envs(self) -> Dict[str, str]: + """Generate environment variables for MinIO configuration. + + Creates environment variables that can be used by benchmark functions + to connect to the MinIO storage instance. + + Returns: + Dict[str, str]: Environment variables for MinIO connection + """ return { "MINIO_ADDRESS": self.address, "MINIO_ACCESS_KEY": self.access_key, @@ -61,13 +140,51 @@ def envs(self) -> dict: @dataclass class NoSQLStorageConfig(ABC): + """Abstract base class for NoSQL database storage configuration. + + This class defines the interface that all NoSQL storage configurations + must implement. It provides serialization methods used for caching + and configuration management. + + This class will be overidden by specific implementations for different + FaaS systems. + + Subclasses must implement: + - serialize(): Convert configuration to dictionary for caching + """ + @abstractmethod - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """Serialize the configuration to a dictionary. + + Returns: + Dict[str, Any]: Serialized configuration data suitable for JSON storage + """ pass @dataclass class ScyllaDBConfig(NoSQLStorageConfig): + """Configuration for ScyllaDB DynamoDB-compatible NoSQL storage. + + ScyllaDB provides a high-performance NoSQL database with DynamoDB-compatible + API through its Alternator interface. This configuration class stores all + the necessary parameters for deploying and connecting to a ScyllaDB instance. + + Attributes: + address: Network address where ScyllaDB is accessible (auto-detected) + mapped_port: Host port mapped to ScyllaDB's Alternator port + alternator_port: Internal port for DynamoDB-compatible API (default: 8000) + access_key: Access key for DynamoDB API (placeholder value) + secret_key: Secret key for DynamoDB API (placeholder value) + instance_id: Docker container ID of the running ScyllaDB instance + region: AWS region placeholder (not used for local deployment) + cpus: Number of CPU cores allocated to ScyllaDB container + memory: Memory allocation in MB for ScyllaDB container + version: ScyllaDB Docker image version to use + data_volume: Host directory path for persistent data storage + """ + address: str = "" mapped_port: int = -1 alternator_port: int = 8000 @@ -80,13 +197,34 @@ class ScyllaDBConfig(NoSQLStorageConfig): version: str = "" data_volume: str = "" - def update_cache(self, path: List[str], cache: Cache): + def update_cache(self, path: List[str], cache: Cache) -> None: + """Update the cache with this configuration's values. + + Stores all configuration fields in the cache using the specified path + as a prefix. This allows the configuration to be restored later from + the cache. + Args: + path: Cache key path prefix for this configuration + cache: Cache instance to store configuration in + """ for key in ScyllaDBConfig.__dataclass_fields__.keys(): cache.update_config(val=getattr(self, key), keys=[*path, key]) @staticmethod - def deserialize(data: dict) -> "ScyllaDBConfig": + def deserialize(data: Dict[str, Any]) -> "ScyllaDBConfig": + """Deserialize configuration from a dictionary. + + Creates a new ScyllaDBConfig instance from dictionary data, typically + loaded from cache or configuration files. Only known configuration + fields are used, unknown fields are ignored. + + Args: + data: Dictionary containing configuration data + + Returns: + ScyllaDBConfig: New configuration instance + """ keys = list(ScyllaDBConfig.__dataclass_fields__.keys()) data = {k: v for k, v in data.items() if k in keys} @@ -94,5 +232,10 @@ def deserialize(data: dict) -> "ScyllaDBConfig": return cfg - def serialize(self) -> dict: + def serialize(self) -> Dict[str, Any]: + """Serialize the configuration to a dictionary. + + Returns: + Dict[str, Any]: All configuration fields as a dictionary + """ return self.__dict__ diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index bb9112a2..0ceb0052 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -1,9 +1,17 @@ +""" +Module for MinIO S3-compatible storage in the Serverless Benchmarking Suite. + +MinIO runs in a Docker container and provides persistent +storage for benchmark data and results. It is primarily used for local +testing and on cloud platforms with no object storage, e.g., OpenWhisk. +""" + import copy import json import os import secrets import uuid -from typing import List, Optional, Type, TypeVar +from typing import Any, Dict, List, Optional, Type, TypeVar import docker import minio @@ -17,42 +25,91 @@ class Minio(PersistentStorage): + """ + This class manages a self-hosted MinIO storage instance running + in a Docker container. It handles bucket creation, file uploads/downloads, + and container lifecycle management. + + Attributes: + config: MinIO configuration settings + connection: MinIO client connection + """ + @staticmethod def typename() -> str: + """ + Get the qualified type name of this class. + + Returns: + str: Full type name including deployment name + """ return f"{Minio.deployment_name()}.Minio" @staticmethod def deployment_name() -> str: + """ + Get the deployment platform name. + + Returns: + str: Deployment name ('minio') + """ return "minio" - # the location does not matter + # The region setting is required by S3 API but not used for local MinIO MINIO_REGION = "us-east-1" def __init__( self, - docker_client: docker.client, + docker_client: docker.DockerClient, cache_client: Cache, resources: Resources, replace_existing: bool, ): + """ + Initialize a MinIO storage instance. + + Args: + docker_client: Docker client for managing the MinIO container + cache_client: Cache client for storing storage configuration + resources: Resources configuration + replace_existing: Whether to replace existing buckets + """ super().__init__(self.MINIO_REGION, cache_client, resources, replace_existing) - self._docker_client = docker_client - self._storage_container: Optional[docker.container] = None + self._docker_client: docker.DockerClient = docker_client + self._storage_container: Optional[docker.models.containers.Container] = None self._cfg = MinioConfig() @property def config(self) -> MinioConfig: + """ + Get the MinIO configuration. + + Returns: + MinioConfig: The configuration object + """ return self._cfg @config.setter def config(self, config: MinioConfig): + """ + Set the MinIO configuration. + + Args: + config: New configuration object + """ self._cfg = config @staticmethod - def _define_http_client(): + def _define_http_client() -> Any: """ - Minio does not allow another way of configuring timeout for connection. - The rest of configuration is copied from source code of Minio. + Configure HTTP client for MinIO with appropriate timeouts and retries. + + MinIO does not provide a direct way to configure connection timeouts, so + we need to create a custom HTTP client with proper timeout settings. + The rest of configuration follows MinIO's default client settings. + + Returns: + urllib3.PoolManager: Configured HTTP client for MinIO """ import urllib3 from datetime import timedelta @@ -67,14 +124,26 @@ def _define_http_client(): ), ) - def start(self): + def start(self) -> None: + """ + Start a MinIO storage container. + + Creates and runs a Docker container with MinIO, configuring it with + random credentials and mounting a volume for persistent storage. + The container runs in detached mode and is accessible via the + configured port. + Raises: + RuntimeError: If starting the MinIO container fails + """ + # Set up data volume location if self._cfg.data_volume == "": minio_volume = os.path.join(project_absolute_path(), "minio-volume") else: minio_volume = self._cfg.data_volume minio_volume = os.path.abspath(minio_volume) + # Create volume directory if it doesn't exist os.makedirs(minio_volume, exist_ok=True) volumes = { minio_volume: { @@ -83,13 +152,16 @@ def start(self): } } + # Generate random credentials for security self._cfg.access_key = secrets.token_urlsafe(32) self._cfg.secret_key = secrets.token_hex(32) self._cfg.address = "" self.logging.info("Minio storage ACCESS_KEY={}".format(self._cfg.access_key)) self.logging.info("Minio storage SECRET_KEY={}".format(self._cfg.secret_key)) + try: self.logging.info(f"Starting storage Minio on port {self._cfg.mapped_port}") + # Run the MinIO container self._storage_container = self._docker_client.containers.run( f"minio/minio:{self._cfg.version}", command="server /data", @@ -114,68 +186,127 @@ def start(self): self.logging.error("Starting Minio storage failed! Unknown error: {}".format(e)) raise RuntimeError("Starting Minio storage unsuccesful") - def configure_connection(self): - # who knows why? otherwise attributes are not loaded - if self._cfg.address == "": + def configure_connection(self) -> None: + """ + Configure the connection to the MinIO container. + Determines the appropriate address to connect to the MinIO container + based on the host platform. For Linux, it uses the container's + bridge IP address, hile for Windows, macOS, or WSL it uses + localhost with the mapped port. + + Raises: + RuntimeError: If the MinIO container is not available or if the IP address + cannot be detected + """ + # Only configure if the address is not already set + if self._cfg.address == "": + # Verify container existence if self._storage_container is None: raise RuntimeError( "Minio container is not available! Make sure that you deployed " "the Minio storage and provided configuration!" ) + # Reload to ensure we have the latest container attributes self._storage_container.reload() - # Check if the system is Linux and that it's not WSL + # Platform-specific address configuration if is_linux(): + # On native Linux, use the container's bridge network IP networks = self._storage_container.attrs["NetworkSettings"]["Networks"] self._cfg.address = "{IPAddress}:{Port}".format( IPAddress=networks["bridge"]["IPAddress"], Port=9000 ) else: - # System is either WSL, Windows, or Mac + # On Windows, macOS, or WSL, use localhost with the mapped port self._cfg.address = f"localhost:{self._cfg.mapped_port}" + # Verify address was successfully determined if not self._cfg.address: self.logging.error( f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" + f"{json.dumps(self._storage_container.attrs, indent=2)}" ) raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" + f"Incorrect detection of IP address for container with id " + f"{self._cfg.instance_id}" ) self.logging.info("Starting minio instance at {}".format(self._cfg.address)) + + # Create the connection using the configured address self.connection = self.get_connection() - def stop(self): + def stop(self) -> None: + """ + Stop the MinIO container. + + Gracefully stops the running MinIO container if it exists. + Logs an error if the container is not known. + """ if self._storage_container is not None: self.logging.info(f"Stopping minio container at {self._cfg.address}.") self._storage_container.stop() self.logging.info(f"Stopped minio container at {self._cfg.address}.") else: - self.logging.error("Stopping minio was not succesful, storage container not known!") + self.logging.error("Stopping minio was not successful, storage container not known!") - def get_connection(self): + def get_connection(self) -> minio.Minio: + """ + Create a new MinIO client connection. + + Creates a connection to the MinIO server using the configured address, + credentials, and HTTP client settings. + + Returns: + minio.Minio: Configured MinIO client + """ return minio.Minio( self._cfg.address, access_key=self._cfg.access_key, secret_key=self._cfg.secret_key, - secure=False, + secure=False, # Local MinIO doesn't use HTTPS http_client=Minio._define_http_client(), ) - def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: bool = False): + def _create_bucket( + self, name: str, buckets: Optional[List[str]] = None, randomize_name: bool = False + ) -> str: + """ + Create a new bucket if it doesn't already exist. + + Checks if a bucket with the given name already exists in the list of buckets. + If not, creates a new bucket with either the exact name or a randomized name. + + Args: + name: Base name for the bucket + buckets: List of existing bucket names to check against + randomize_name: Whether to append a random UUID to the bucket name + + Returns: + str: Name of the existing or newly created bucket + + Raises: + minio.error.ResponseError: If bucket creation fails + """ + + if buckets is None: + buckets = [] + + # Check if bucket already exists for bucket_name in buckets: if name in bucket_name: self.logging.info( "Bucket {} for {} already exists, skipping.".format(bucket_name, name) ) return bucket_name - # minio has limit of bucket name to 16 characters + + # MinIO has limit of bucket name to 16 characters if randomize_name: bucket_name = "{}-{}".format(name, str(uuid.uuid4())[0:16]) else: bucket_name = name + try: self.connection.make_bucket(bucket_name, location=self.MINIO_REGION) self.logging.info("Created bucket {}".format(bucket_name)) @@ -186,55 +317,145 @@ def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: boo minio.error.ResponseError, ) as err: self.logging.error("Bucket creation failed!") - # rethrow + # Rethrow the error for handling by the caller raise err - def uploader_func(self, path_idx, file, filepath): + def uploader_func(self, path_idx: int, file: str, filepath: str) -> None: + """ + Upload a file to the MinIO storage. + + Uploads a file to the specified input prefix in the benchmarks bucket. + This function is passed to benchmarks for uploading their input data. + + Args: + path_idx: Index of the input prefix to use + file: Name of the file within the bucket + filepath: Local path to the file to upload + + Raises: + minio.error.ResponseError: If the upload fails + """ try: key = os.path.join(self.input_prefixes[path_idx], file) bucket_name = self.get_bucket(Resources.StorageBucketType.BENCHMARKS) self.connection.fput_object(bucket_name, key, filepath) except minio.error.ResponseError as err: self.logging.error("Upload failed!") - raise (err) - - def clean(self): - for bucket in self.output_buckets: - objects = self.connection.list_objects_v2(bucket) - objects = [obj.object_name for obj in objects] - for err in self.connection.remove_objects(bucket, objects): - self.logging.error("Deletion Error: {}".format(err)) - - def download_results(self, result_dir): - result_dir = os.path.join(result_dir, "storage_output") - for bucket in self.output_buckets: - objects = self.connection.list_objects_v2(bucket) - objects = [obj.object_name for obj in objects] - for obj in objects: - self.connection.fget_object(bucket, obj, os.path.join(result_dir, obj)) - - def clean_bucket(self, bucket: str): + raise err + + # FIXME: is still even used anywhere? + # def clean(self) -> None: + # """ + # Clean all objects from output buckets. + + # Removes all objects from the output buckets to prepare for a new + # benchmark run. Logs any errors that occur during deletion. + # """ + # for bucket in self.output_buckets: + # objects = self.connection.list_objects_v2(bucket) + # objects = [obj.object_name for obj in objects] + # for err in self.connection.remove_objects(bucket, objects): + # self.logging.error("Deletion Error: {}".format(err)) + # + # def download_results(self, result_dir: str) -> None: + # """ + # Download all objects from output buckets to a local directory. + + # Downloads benchmark results from all output buckets to a subdirectory + # named 'storage_output' within the specified result directory. + + # Args: + # result_dir: Base directory to store downloaded results + # """ + # result_dir = os.path.join(result_dir, "storage_output") + # for bucket in self.output_buckets: + # objects = self.connection.list_objects_v2(bucket) + # objects = [obj.object_name for obj in objects] + # for obj in objects: + # self.connection.fget_object(bucket, obj, os.path.join(result_dir, obj)) + + def clean_bucket(self, bucket_name: str) -> None: + """ + Remove all objects from a bucket. + + Deletes all objects within the specified bucket but keeps the bucket itself. + Logs any errors that occur during object deletion. + + Args: + bucket: Name of the bucket to clean + """ delete_object_list = map( lambda x: minio.DeleteObject(x.object_name), - self.connection.list_objects(bucket_name=bucket), + self.connection.list_objects(bucket_name=bucket_name), ) - errors = self.connection.remove_objects(bucket, delete_object_list) + errors = self.connection.remove_objects(bucket_name, delete_object_list) for error in errors: - self.logging.error(f"Error when deleting object from bucket {bucket}: {error}!") + self.logging.error(f"Error when deleting object from bucket {bucket_name}: {error}!") + + def remove_bucket(self, bucket: str) -> None: + """ + Delete a bucket completely. + + Removes the specified bucket from the MinIO storage. + The bucket must be empty before it can be deleted. - def remove_bucket(self, bucket: str): + Args: + bucket: Name of the bucket to remove + """ self.connection.remove_bucket(Bucket=bucket) def correct_name(self, name: str) -> str: + """ + Format a bucket name to comply with MinIO naming requirements. + + For MinIO, no name correction is needed (unlike some cloud providers + that enforce additional restrictions). + + Args: + name: Original bucket name + + Returns: + str: Bucket name (unchanged for MinIO) + """ return name - def download(self, bucket_name: str, key: str, filepath: str): + def download(self, bucket_name: str, key: str, filepath: str) -> None: + """ + Download an object from a bucket to a local file. + + Not implemented for this class. Use fget_object directly or other methods. + + Raises: + NotImplementedError: This method is not implemented + """ raise NotImplementedError() def exists_bucket(self, bucket_name: str) -> bool: + """ + Check if a bucket exists. + + Args: + bucket_name: Name of the bucket to check + + Returns: + bool: True if the bucket exists, False otherwise + """ return self.connection.bucket_exists(bucket_name) def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """ + List all objects in a bucket with an optional prefix filter. + + Args: + bucket_name: Name of the bucket to list + prefix: Optional prefix to filter objects + + Returns: + List[str]: List of object names in the bucket + + Raises: + RuntimeError: If the bucket does not exist + """ try: objects_list = self.connection.list_objects(bucket_name) return [obj.object_name for obj in objects_list if prefix in obj.object_name] @@ -242,25 +463,40 @@ def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: raise RuntimeError(f"Attempting to access a non-existing bucket {bucket_name}!") def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """ + List all buckets, optionally filtered by name. + + Args: + bucket_name: Optional filter for bucket names + + Returns: + List[str]: List of bucket names + """ buckets = self.connection.list_buckets() if bucket_name is not None: return [bucket.name for bucket in buckets if bucket_name in bucket.name] else: return [bucket.name for bucket in buckets] - def upload(self, bucket_name: str, filepath: str, key: str): - raise NotImplementedError() + def upload(self, bucket_name: str, filepath: str, key: str) -> None: + """ + Upload a file to a bucket. - def serialize(self) -> dict: - return self._cfg.serialize() + Not implemented for this class. Use fput_object directly or uploader_func. - """ - This implementation supports overriding this class. - The main Minio class is used to start/stop deployments. + Raises: + NotImplementedError: This method is not implemented + """ + raise NotImplementedError() - When overriding the implementation in Local/OpenWhisk/..., - we call the _deserialize and provide an alternative implementation. - """ + def serialize(self) -> Dict[str, Any]: + """ + Serialize MinIO configuration to a dictionary. + + Returns: + dict: Serialized configuration data + """ + return self._cfg.serialize() T = TypeVar("T", bound="Minio") @@ -271,9 +507,35 @@ def _deserialize( resources: Resources, obj_type: Type[T], ) -> T: + """ + Deserialize a MinIO instance from cached configuration with custom type. + + Creates a new instance of the specified class type from cached configuration + data. This allows platform-specific versions to be deserialized correctly + while sharing the core implementation. When overriding the implementation in + Local/OpenWhisk/..., we call the _deserialize method and provide an + alternative implementation type. + + FIXME: is this still needed? It looks like we stopped using + platform-specific implementations. + + Args: + cached_config: Cached MinIO configuration + cache_client: Cache client + resources: Resources configuration + obj_type: Type of object to create (a Minio subclass) + + Returns: + T: Deserialized instance of the specified type + + Raises: + RuntimeError: If the storage container does not exist + """ docker_client = docker.from_env() obj = obj_type(docker_client, cache_client, resources, False) obj._cfg = cached_config + + # Try to reconnect to existing container if ID is available if cached_config.instance_id: instance_id = cached_config.instance_id try: @@ -282,11 +544,28 @@ def _deserialize( raise RuntimeError(f"Storage container {instance_id} does not exist!") else: obj._storage_container = None + + # Copy bucket information obj._input_prefixes = copy.copy(cached_config.input_buckets) obj._output_prefixes = copy.copy(cached_config.output_buckets) + + # Set up connection obj.configure_connection() return obj @staticmethod def deserialize(cached_config: MinioConfig, cache_client: Cache, res: Resources) -> "Minio": + """ + Deserialize a MinIO instance from cached configuration. + + Creates a new Minio instance from cached configuration data. + + Args: + cached_config: Cached MinIO configuration + cache_client: Cache client + res: Resources configuration + + Returns: + Minio: Deserialized Minio instance + """ return Minio._deserialize(cached_config, cache_client, res, Minio) diff --git a/sebs/storage/resources.py b/sebs/storage/resources.py index a85e725e..866cc5cc 100644 --- a/sebs/storage/resources.py +++ b/sebs/storage/resources.py @@ -1,4 +1,15 @@ -from typing import cast, Optional, Tuple +"""Resource management for self-hosted storage deployments in SeBS. + +Its main responsibility is providing consistent interface and cache +behavior of self-hosted storage for the entire SeBS system. + +Key Classes: + SelfHostedResources: Configuration management for self-hosted storage resources + SelfHostedSystemResources: System-level resource management and service provisioning +""" + +import docker +from typing import cast, Dict, Optional, Tuple, Any from sebs.cache import Cache from sebs.faas.config import Config, Resources @@ -15,30 +26,57 @@ ) from sebs.utils import LoggingHandlers -import docker - class SelfHostedResources(Resources): + """Resource configuration for self-hosted storage deployments. + + Attributes: + _object_storage: Configuration for object storage (MinIO) + _nosql_storage: Configuration for NoSQL storage (ScyllaDB) + """ + def __init__( self, name: str, storage_cfg: Optional[PersistentStorageConfig] = None, nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, ): + """Initialize self-hosted resources configuration. + + Args: + name: Name of the deployment/resource group + storage_cfg: Configuration for object storage service + nosql_storage_cfg: Configuration for NoSQL storage service + """ super().__init__(name=name) self._object_storage = storage_cfg self._nosql_storage = nosql_storage_cfg @property def storage_config(self) -> Optional[PersistentStorageConfig]: + """Get the object storage configuration. + + Returns: + Optional[PersistentStorageConfig]: Object storage configuration or None + """ return self._object_storage @property def nosql_storage_config(self) -> Optional[NoSQLStorageConfig]: + """Get the NoSQL storage configuration. + + Returns: + Optional[NoSQLStorageConfig]: NoSQL storage configuration or None + """ return self._nosql_storage - def serialize(self) -> dict: - out: dict = {} + def serialize(self) -> Dict[str, Any]: + """Serialize the resource configuration to a dictionary. + + Returns: + Dict[str, Any]: Serialized configuration containing storage and/or nosql sections + """ + out: Dict[str, Any] = {} if self._object_storage is not None: out = {**out, "storage": self._object_storage.serialize()} @@ -48,7 +86,15 @@ def serialize(self) -> dict: return out - def update_cache(self, cache: Cache): + def update_cache(self, cache: Cache) -> None: + """Update the configuration cache with current resource settings. + + Stores both object storage and NoSQL storage configurations in the + cache for later retrieval. + + Args: + cache: Cache instance to store configurations in + """ super().update_cache(cache) if self._object_storage is not None: cast(MinioConfig, self._object_storage).update_cache( @@ -60,10 +106,23 @@ def update_cache(self, cache: Cache): ) def _deserialize_storage( - self, config: dict, cached_config: Optional[dict], storage_type: str - ) -> Tuple[str, dict]: + self, config: Dict[str, Any], cached_config: Optional[Dict[str, Any]], storage_type: str + ) -> Tuple[str, Dict[str, Any]]: + """Deserialize storage configuration from config or cache. + + Attempts to load storage configuration from the provided config first, + then falls back to cached configuration if available. + + Args: + config: Current configuration dictionary + cached_config: Previously cached configuration dictionary + storage_type: Type of storage to deserialize ('object' or 'nosql') + + Returns: + Tuple[str, Dict[str, Any]]: Storage implementation name and configuration + """ storage_impl = "" - storage_config = {} + storage_config: Dict[str, Any] = {} # Check for new config if "storage" in config and storage_type in config["storage"]: @@ -91,7 +150,19 @@ def _deserialize_storage( return storage_impl, storage_config @staticmethod - def _deserialize(ret: "SelfHostedResources", config: dict, cached_config: dict): + def _deserialize( + ret: "SelfHostedResources", config: Dict[str, Any], cached_config: Optional[Dict[str, Any]] + ) -> None: + """Deserialize storage configurations from config and cache data. + + Populates the SelfHostedResources instance with storage configurations + loaded from the provided configuration and cached data. + + Args: + ret: SelfHostedResources instance to populate + config: Current configuration dictionary + cached_config: Previously cached configuration dictionary + """ obj_storage_impl, obj_storage_cfg = ret._deserialize_storage( config, cached_config, "object" ) @@ -118,6 +189,15 @@ def _deserialize(ret: "SelfHostedResources", config: dict, cached_config: dict): class SelfHostedSystemResources(SystemResources): + """System-level resource management for self-hosted storage deployments. + + Attributes: + _name: Name of the deployment + _logging_handlers: Logging configuration handlers + _storage: Active persistent storage instance (MinIO) + _nosql_storage: Active NoSQL storage instance (ScyllaDB) + """ + def __init__( self, name: str, @@ -126,6 +206,15 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """Initialize system resources for self-hosted storage. + + Args: + name: Name of the deployment + config: SeBS configuration object + cache_client: Cache client for configuration persistence + docker_client: Docker client for container management + logger_handlers: Logging configuration handlers + """ super().__init__(config, cache_client, docker_client) self._name = name @@ -133,17 +222,22 @@ def __init__( self._storage: Optional[PersistentStorage] = None self._nosql_storage: Optional[NoSQLStorage] = None - """ - Create wrapper object for minio storage and fill buckets. - Starts minio as a Docker instance, using always fresh buckets. + def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + """Get or create a persistent storage instance. - :param benchmark: - :param buckets: number of input and output buckets - :param replace_existing: not used. - :return: Azure storage instance - """ + Creates a MinIO storage instance if one doesn't exist, or returns the + existing instance. The storage is deserialized from a serialized + config of an existing storage deployment. - def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + Args: + replace_existing: Whether to replace existing buckets (optional) + + Returns: + PersistentStorage: MinIO storage instance + + Raises: + RuntimeError: If storage configuration is missing or unsupported + """ if self._storage is None: storage_config = cast(SelfHostedResources, self._config.resources).storage_config if storage_config is None: @@ -172,6 +266,19 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStor return self._storage def get_nosql_storage(self) -> NoSQLStorage: + """Get or create a NoSQL storage instance. + + Creates a ScyllaDB storage instance if one doesn't exist, or returns the + existing instance. The storage is deserialized from a serialized + config of an existing storage deployment. + + + Returns: + NoSQLStorage: ScyllaDB storage instance + + Raises: + RuntimeError: If NoSQL storage configuration is missing or unsupported + """ if self._nosql_storage is None: storage_config = cast(SelfHostedResources, self._config.resources).nosql_storage_config if storage_config is None: diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index aae97815..7e311350 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -1,9 +1,21 @@ +"""ScyllaDB NoSQL storage implementation for the Serverless Benchmarking Suite. + +This module implements NoSQL database storage using ScyllaDB, which provides a +DynamoDB-compatible API through its Alternator interface. ScyllaDB runs in a +Docker container, and the implementation uses boto3 while running locally +for development and testing purposes. +""" + import json import os import platform import time from collections import defaultdict -from typing import Dict, Optional, Tuple, Type, TypeVar +from typing import Any, Dict, Optional, Tuple, Type, TypeVar + +import boto3 +from boto3.dynamodb.types import TypeSerializer +import docker from sebs.cache import Cache from sebs.faas.config import Resources @@ -12,38 +24,75 @@ from sebs.storage.config import ScyllaDBConfig from sebs.utils import project_absolute_path -import boto3 -from boto3.dynamodb.types import TypeSerializer -import docker - class ScyllaDB(NoSQLStorage): + """ScyllaDB implementation for DynamoDB-compatible NoSQL storage. + + This class manages a ScyllaDB instance running in a Docker container, + providing DynamoDB-compatible NoSQL storage through ScyllaDB's Alternator + interface. It handles table creation, data operations, and container + lifecycle management. + + Attributes: + _docker_client: Docker client for container management + _storage_container: Docker container running ScyllaDB + _cfg: ScyllaDB configuration settings + _tables: Mapping of benchmark names to table mappings + _serializer: DynamoDB type serializer for data conversion + client: Boto3 DynamoDB client configured for ScyllaDB + """ + @staticmethod def typename() -> str: + """Get the qualified type name of this class. + + Returns: + str: Full type name including deployment name + """ return f"{ScyllaDB.deployment_name()}.ScyllaDB" @staticmethod def deployment_name() -> str: + """Get the deployment platform name. + + Returns: + str: Deployment name ('scylladb') + """ return "scylladb" @property def config(self) -> ScyllaDBConfig: + """Get the ScyllaDB configuration. + + Returns: + ScyllaDBConfig: The configuration object + """ return self._cfg - # the location does not matter + # The region setting is required by DynamoDB API but not used for local ScyllaDB SCYLLADB_REGION = "None" def __init__( self, - docker_client: docker.client, + docker_client: docker.DockerClient, cache_client: Cache, config: ScyllaDBConfig, resources: Optional[Resources] = None, ): + """Initialize a ScyllaDB storage instance. + It will initialize a boto3 client if the ScyllaDB + address is provided in the configuration. + + Args: + docker_client: Docker client for managing the ScyllaDB container + cache_client: Cache client for storing storage configuration + config: ScyllaDB configuration settings + resources: Resources configuration (optional) + """ super().__init__(self.SCYLLADB_REGION, cache_client, resources) # type: ignore self._docker_client = docker_client - self._storage_container: Optional[docker.container] = None + self._storage_container: Optional[docker.models.containers.Container] = None self._cfg = config # Map benchmark -> orig_name -> table_name @@ -59,8 +108,20 @@ def __init__( endpoint_url=f"http://{config.address}", ) - def start(self): + def start(self) -> None: + """Start a ScyllaDB storage container. + + Creates and runs a Docker container with ScyllaDB, configuring it with + the specified CPU and memory resources. The container runs in detached + mode and exposes the Alternator DynamoDB-compatible API on the configured port. + The method waits for ScyllaDB to fully initialize by checking the nodetool + status until the service is ready. + + Raises: + RuntimeError: If starting the ScyllaDB container fails or if ScyllaDB + fails to initialize within the timeout period + """ if self._cfg.data_volume == "": scylladb_volume = os.path.join(project_absolute_path(), "scylladb-volume") else: @@ -76,7 +137,6 @@ def start(self): } try: - scylladb_args = "" scylladb_args += f"--smp {self._cfg.cpus} " scylladb_args += f"--memory {self._cfg.memory}M " @@ -104,34 +164,43 @@ def start(self): attempts = 0 max_attempts = 30 while attempts < max_attempts: - exit_code, out = self._storage_container.exec_run("nodetool status") if exit_code == 0: - self.logging.info("Started ScyllaDB succesfully!") + self.logging.info("Started ScyllaDB successfully!") break time.sleep(1.0) attempts += 1 if attempts == max_attempts: - self.logging.error("Failed to launch ScyllaBD!") + self.logging.error("Failed to launch ScyllaDB!") self.logging.error(f"Last result of nodetool status: {out}") - raise RuntimeError("Failed to launch ScyllaBD!") + raise RuntimeError("Failed to launch ScyllaDB!") self.configure_connection() except docker.errors.APIError as e: self.logging.error("Starting ScyllaDB storage failed! Reason: {}".format(e)) - raise RuntimeError("Starting ScyllaDB storage unsuccesful") + raise RuntimeError("Starting ScyllaDB storage unsuccessful") except Exception as e: self.logging.error("Starting ScyllaDB storage failed! Unknown error: {}".format(e)) - raise RuntimeError("Starting ScyllaDB storage unsuccesful") + raise RuntimeError("Starting ScyllaDB storage unsuccessful") - # FIXME: refactor this - duplicated code from minio - def configure_connection(self): - # who knows why? otherwise attributes are not loaded - if self._cfg.address == "": + def configure_connection(self) -> None: + """Configure the connection to the ScyllaDB container. + + Determines the appropriate address to connect to the ScyllaDB container + based on the host platform. For Linux, it uses the container's IP address, + while for Windows, macOS, or WSL it uses localhost with the mapped port. + Creates a boto3 DynamoDB client configured to connect to ScyllaDB's + Alternator interface. + + Raises: + RuntimeError: If the ScyllaDB container is not available or if the IP address + cannot be detected + """ + if self._cfg.address == "": if self._storage_container is None: raise RuntimeError( "ScyllaDB container is not available! Make sure that you deployed " @@ -153,34 +222,53 @@ def configure_connection(self): if not self._cfg.address: self.logging.error( f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" + f"{json.dumps(self._storage_container.attrs, indent=2)}" ) raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" + f"Incorrect detection of IP address for container with id " + f"{self._cfg.instance_id}" ) self.logging.info("Starting ScyllaDB instance at {}".format(self._cfg.address)) - def stop(self): + # Create the DynamoDB client for ScyllaDB's Alternator interface + self.client = boto3.client( + "dynamodb", + region_name="None", + aws_access_key_id="None", + aws_secret_access_key="None", + endpoint_url=f"http://{self._cfg.address}", + ) + + def stop(self) -> None: + """Stop the ScyllaDB container. + + Gracefully stops the running ScyllaDB container if it exists. + """ if self._storage_container is not None: self.logging.info(f"Stopping ScyllaDB container at {self._cfg.address}.") self._storage_container.stop() self.logging.info(f"Stopped ScyllaDB container at {self._cfg.address}.") else: - self.logging.error("Stopping ScyllaDB was not succesful, storage container not known!") + self.logging.error("Stopping ScyllaDB was not successful, storage container not known!") - def envs(self) -> dict: - return {"NOSQL_STORAGE_TYPE": "scylladb", "NOSQL_STORAGE_ENDPOINT": self._cfg.address} + def envs(self) -> Dict[str, str]: + """Generate environment variables for ScyllaDB configuration. - def serialize(self) -> Tuple[StorageType, dict]: - return StorageType.SCYLLADB, self._cfg.serialize() + Creates environment variables that can be used by benchmark functions + to connect to the ScyllaDB storage instance. - """ - This implementation supports overriding this class. - The main ScyllaDB class is used to start/stop deployments. + Returns: + Dict[str, str]: Environment variables for ScyllaDB connection + """ + return {"NOSQL_STORAGE_TYPE": "scylladb", "NOSQL_STORAGE_ENDPOINT": self._cfg.address} - When overriding the implementation in Local/OpenWhisk/..., - we call the _deserialize and provide an alternative implementation. - """ + def serialize(self) -> Tuple[StorageType, Dict[str, Any]]: + """Serialize ScyllaDB configuration to a tuple. + + Returns: + Tuple[StorageType, Dict[str, Any]]: Storage type and serialized configuration + """ + return StorageType.SCYLLADB, self._cfg.serialize() T = TypeVar("T", bound="ScyllaDB") @@ -188,6 +276,27 @@ def serialize(self) -> Tuple[StorageType, dict]: def _deserialize( cached_config: ScyllaDBConfig, cache_client: Cache, resources: Resources, obj_type: Type[T] ) -> T: + """Deserialize a ScyllaDB instance from cached configuration with custom type. + + Creates a new instance of the specified class type from cached configuration + data. This allows platform-specific versions to be deserialized correctly + while sharing the core implementation. + + FIXME: is this still needed? It looks like we stopped using + platform-specific implementations. + + Args: + cached_config: Cached ScyllaDB configuration + cache_client: Cache client + resources: Resources configuration + obj_type: Type of object to create (a ScyllaDB subclass) + + Returns: + T: Deserialized instance of the specified type + + Raises: + RuntimeError: If the storage container does not exist + """ docker_client = docker.from_env() obj = obj_type(docker_client, cache_client, cached_config, resources) @@ -205,10 +314,32 @@ def _deserialize( def deserialize( cached_config: ScyllaDBConfig, cache_client: Cache, resources: Resources ) -> "ScyllaDB": + """Deserialize a ScyllaDB instance from cached configuration. + + Creates a new ScyllaDB instance from cached configuration data. + + Args: + cached_config: Cached ScyllaDB configuration + cache_client: Cache client + resources: Resources configuration + + Returns: + ScyllaDB: Deserialized ScyllaDB instance + """ return ScyllaDB._deserialize(cached_config, cache_client, resources, ScyllaDB) def retrieve_cache(self, benchmark: str) -> bool: + """Retrieve cached table configuration for a benchmark. + + Checks if table configuration for the given benchmark is already loaded + in memory, and if not, attempts to load it from the cache. + Args: + benchmark: Name of the benchmark + + Returns: + bool: True if table configuration was found, False otherwise + """ if benchmark in self._tables: return True @@ -219,8 +350,15 @@ def retrieve_cache(self, benchmark: str) -> bool: return False - def update_cache(self, benchmark: str): + def update_cache(self, benchmark: str) -> None: + """Update the cache with table configuration for a benchmark. + + Stores the table configuration for the specified benchmark in the cache + for future retrieval. + Args: + benchmark: Name of the benchmark + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, @@ -230,10 +368,26 @@ def update_cache(self, benchmark: str): ) def get_tables(self, benchmark: str) -> Dict[str, str]: + """Get the table name mappings for a benchmark. + + Args: + benchmark: Name of the benchmark + + Returns: + Dict[str, str]: Mapping from original table names to actual table names + """ return self._tables[benchmark] def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """Get the actual table name for a benchmark's logical table name. + Args: + benchmark: Name of the benchmark + table: Logical table name + + Returns: + Optional[str]: Actual table name or None if not found + """ if benchmark not in self._tables: return None @@ -246,11 +400,25 @@ def write_to_table( self, benchmark: str, table: str, - data: dict, + data: Dict[str, Any], primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, - ): - + ) -> None: + """Write data to a DynamoDB table in ScyllaDB. + + Serializes the data using DynamoDB type serialization and writes it + to the specified table with the provided primary and optional secondary keys. + + Args: + benchmark: Name of the benchmark + table: Logical table name + data: Data to write to the table + primary_key: Tuple of (key_name, key_value) for the primary key + secondary_key: Optional tuple of (key_name, key_value) for the secondary key + + Raises: + AssertionError: If the table name is not found + """ table_name = self._get_table_name(benchmark, table) assert table_name is not None @@ -261,21 +429,35 @@ def write_to_table( serialized_data = {k: self._serializer.serialize(v) for k, v in data.items()} self.client.put_item(TableName=table_name, Item=serialized_data) - """ - AWS: create a DynamoDB Table - - In contrast to the hierarchy of database objects in Azure (account -> database -> container) - and GCP (database per benchmark), we need to create unique table names here. - """ - def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: + """Create a DynamoDB table in ScyllaDB. + + Creates a new DynamoDB table with the specified primary key and optional + secondary key. The table name is constructed to be unique across benchmarks + and resource groups. + + Note: Unlike cloud providers with hierarchical database structures, + ScyllaDB requires unique table names at the cluster level. + + Note: PAY_PER_REQUEST billing mode has no effect here. + + Args: + benchmark: Name of the benchmark + name: Logical table name + primary_key: Name of the primary key attribute + secondary_key: Optional name of the secondary key attribute + + Returns: + str: The actual table name that was created + Raises: + RuntimeError: If table creation fails for unknown reasons + """ table_name = f"sebs-benchmarks-{self._cloud_resources.resources_id}-{benchmark}-{name}" try: - definitions = [{"AttributeName": primary_key, "AttributeType": "S"}] key_schema = [{"AttributeName": primary_key, "KeyType": "HASH"}] @@ -301,7 +483,6 @@ def create_table( return ret["TableDescription"]["TableName"] except self.client.exceptions.ResourceInUseException as e: - if "already exists" in e.response["Error"]["Message"]: self.logging.info( f"Using existing DynamoDB table {table_name} for benchmark {benchmark}" @@ -312,7 +493,29 @@ def create_table( raise RuntimeError(f"Creating DynamoDB failed, unknown reason! Error: {e}") def clear_table(self, name: str) -> str: + """Clear all data from a table. + + Args: + name: Name of the table to clear + + Returns: + str: Table name + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """Remove a table completely. + + Args: + name: Name of the table to remove + + Returns: + str: Table name + + Raises: + NotImplementedError: This method is not yet implemented + """ raise NotImplementedError() diff --git a/sebs/types.py b/sebs/types.py index b87516fb..a6d3c38d 100644 --- a/sebs/types.py +++ b/sebs/types.py @@ -1,12 +1,40 @@ +"""Type definitions for the Serverless Benchmarking Suite. + +This module provides enum types used throughout the benchmarking suite +to represent different platforms, storage types, and benchmark modules. +These types are used for configuration, deployment, and resource management. +""" + from enum import Enum class BenchmarkModule(str, Enum): + """Types of benchmark modules. + + Benchmark modules indicate which additional packages and configuration + are needed for the benchmark to work correctly. + + - STORAGE: Object storage module for storing and retrieving files + - NOSQL: NoSQL database module for storing and retrieving structured data + """ + STORAGE = "storage" NOSQL = "nosql" class Platforms(str, Enum): + """Supported serverless platforms. + + This enum defines the different serverless platforms supported by + the benchmarking suite: + + - AWS: Amazon Web Services Lambda + - AZURE: Microsoft Azure Functions + - GCP: Google Cloud Platform Cloud Functions + - LOCAL: Local execution environment + - OPENWHISK: Apache OpenWhisk + """ + AWS = "aws" AZURE = "azure" GCP = "gcp" @@ -15,6 +43,17 @@ class Platforms(str, Enum): class Storage(str, Enum): + """Supported object storage services. + + This enum defines the different object storage services supported + by the benchmarking suite: + + - AWS_S3: Amazon Simple Storage Service (S3) + - AZURE_BLOB_STORAGE: Microsoft Azure Blob Storage + - GCP_STORAGE: Google Cloud Storage + - MINIO: MinIO object storage (local or self-hosted) + """ + AWS_S3 = "aws-s3" AZURE_BLOB_STORAGE = "azure-blob-storage" GCP_STORAGE = "google-cloud-storage" @@ -22,6 +61,17 @@ class Storage(str, Enum): class NoSQLStorage(str, Enum): + """Supported NoSQL database services. + + This enum defines the different NoSQL database services supported + by the benchmarking suite: + + - AWS_DYNAMODB: Amazon DynamoDB + - AZURE_COSMOSDB: Microsoft Azure Cosmos DB + - GCP_DATASTORE: Google Cloud Datastore + - SCYLLADB: ScyllaDB (compatible with Apache Cassandra) + """ + AWS_DYNAMODB = "aws-dynamodb" AZURE_COSMOSDB = "azure-cosmosdb" GCP_DATASTORE = "google-cloud-datastore" diff --git a/sebs/utils.py b/sebs/utils.py index e7ab43f6..69aebdbd 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -1,3 +1,14 @@ +""" +Utility functions and classes for the Serverless Benchmarking Suite (SeBs). + +This module provides common utilities used throughout the framework, including: +- File system operations and path management +- Process execution and command handling +- JSON serialization and data manipulation +- Logging configuration and utilities +- Platform detection functions +""" + import json import logging import os @@ -10,17 +21,46 @@ from typing import List, Optional +# Global constants PROJECT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir) DOCKER_DIR = os.path.join(PROJECT_DIR, "dockerfiles") PACK_CODE_APP = "pack_code_{}.sh" -def project_absolute_path(*paths: str): +def project_absolute_path(*paths: str) -> str: + """ + Join paths relative to the project root directory. + + Args: + *paths: Path components to join + + Returns: + str: Absolute path including the project directory + """ return os.path.join(PROJECT_DIR, *paths) class JSONSerializer(json.JSONEncoder): + """ + Custom JSON encoder for objects with serialize method. + + This encoder handles objects by: + 1. Using their serialize() method if available + 2. Converting dictionaries to strings + 3. Using vars() to get object attributes + 4. Falling back to string representation + """ + def default(self, o): + """ + Custom serialization for objects. + + Args: + o: Object to serialize + + Returns: + JSON serializable representation of the object + """ if hasattr(o, "serialize"): return o.serialize() elif isinstance(o, dict): @@ -33,14 +73,37 @@ def default(self, o): def serialize(obj) -> str: + """ + Serialize an object to a JSON string. + Applies `serialize` method when defined by the object. + + Args: + obj: Object to serialize + + Returns: + str: JSON string representation of the object + """ if hasattr(obj, "serialize"): return json.dumps(obj.serialize(), sort_keys=True, indent=2) else: return json.dumps(obj, cls=JSONSerializer, sort_keys=True, indent=2) -# Executing with shell provides options such as wildcard expansion -def execute(cmd, shell=False, cwd=None): +def execute(cmd, shell=False, cwd=None) -> str: + """ + Execute a shell command and capture its output, handling errors. + + Args: + cmd: Command to execute (string) + shell: Whether to use shell execution (enables wildcards, pipes, etc.) + cwd: Working directory for command execution + + Returns: + str: Command output as string + + Raises: + RuntimeError: If command execution fails + """ if not shell: cmd = cmd.split() ret = subprocess.run( @@ -53,7 +116,15 @@ def execute(cmd, shell=False, cwd=None): return ret.stdout.decode("utf-8") -def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]): +def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]) -> None: + """ + Update a nested dictionary with a value at the specified key path. + + Args: + cfg: Dictionary to update + keys: List of keys forming a path to the value + value: Value to set (skipped if None) + """ if value is not None: # make sure parent keys exist for key in keys[:-1]: @@ -61,7 +132,15 @@ def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]): cfg[keys[-1]] = value -def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]): +def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]) -> None: + """ + Append a dictionary to a nested location in another dictionary. + + Args: + cfg: Dictionary to update + keys: List of keys forming a path to the value + value: Dictionary to append (skipped if None or empty) + """ if value: # make sure parent keys exist for key in keys[:-1]: @@ -69,14 +148,35 @@ def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]): cfg[keys[-1]] = {**cfg[keys[-1]], **value} -def find(name, path): +def find(name: str, path: str) -> Optional[str]: + """ + Find a directory with the given name in the specified path. + + Args: + name: Directory name to find + path: Path to search in + + Returns: + str: Path to the found directory, or None if not found + """ for root, dirs, files in os.walk(path): if name in dirs: return os.path.join(root, name) return None -def create_output(directory, preserve_dir, verbose): +def create_output(directory: str, preserve_dir: bool, verbose: bool) -> str: + """ + Create or clean an output directory for benchmark results. + + Args: + directory: Path to create + preserve_dir: Whether to preserve existing directory + verbose: Verbosity level for logging + + Returns: + str: Absolute path to the output directory + """ output_dir = os.path.abspath(directory) if os.path.exists(output_dir) and not preserve_dir: shutil.rmtree(output_dir) @@ -87,8 +187,13 @@ def create_output(directory, preserve_dir, verbose): return output_dir -def configure_logging(): +def configure_logging() -> None: + """ + Configure global logging settings. + Reduces noise from third-party libraries by setting their log levels to ERROR. + This ensures that only important messages from these libraries are shown. + """ # disable information from libraries logging to decrease output noise loggers = ["urrlib3", "docker", "botocore"] for name in logging.root.manager.loggerDict: @@ -97,63 +202,54 @@ def configure_logging(): logging.getLogger(name).setLevel(logging.ERROR) -# def configure_logging(verbose: bool = False, output_dir: Optional[str] = None): -# logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" -# logging_date_format = "%H:%M:%S" -# -# # default file log -# options = { -# "format": logging_format, -# "datefmt": logging_date_format, -# "level": logging.DEBUG if verbose else logging.INFO, -# } -# if output_dir: -# options = { -# **options, -# "filename": os.path.join(output_dir, "out.log"), -# "filemode": "w", -# } -# logging.basicConfig(**options) -# # Add stdout output -# if output_dir: -# stdout = logging.StreamHandler(sys.stdout) -# formatter = logging.Formatter(logging_format, logging_date_format) -# stdout.setFormatter(formatter) -# stdout.setLevel(logging.DEBUG if verbose else logging.INFO) -# logging.getLogger().addHandler(stdout) -# # disable information from libraries logging to decrease output noise -# for name in logging.root.manager.loggerDict: -# if ( -# name.startswith("urllib3") -# or name.startswith("docker") -# or name.startswith("botocore") -# ): -# logging.getLogger(name).setLevel(logging.ERROR) - +def find_benchmark(benchmark: str, path: str) -> Optional[str]: + """ + Locate directory corresponding to a benchmark in the repository. -""" - Locate directory corresponding to a benchmark in benchmarks - or benchmarks-data directory. - - :param benchmark: Benchmark name. - :param path: Path for lookup, relative to repository. - :return: relative path to directory corresponding to benchmark -""" + Searches for a benchmark directory in either the benchmarks or + benchmarks-data directories. + Args: + benchmark: Benchmark name + path: Path for lookup, relative to repository (usually 'benchmarks' or 'benchmarks-data') -def find_benchmark(benchmark: str, path: str): + Returns: + str: Path to benchmark directory, or None if not found + """ benchmarks_dir = os.path.join(PROJECT_DIR, path) benchmark_path = find(benchmark, benchmarks_dir) return benchmark_path -def global_logging(): +def global_logging() -> None: + """ + Set up basic global logging configuration. + + Configures the root logger with a standard format, timestamp, and INFO level. + This provides a baseline for all logging in the application. + """ logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" logging.basicConfig(format=logging_format, datefmt=logging_date_format, level=logging.INFO) class ColoredWrapper: + """ + Wrapper for logging with colored console output. + + This class provides formatted, colorized logging output for better readability + in terminal environments. It optionally propagates messages to the standard + Python logger. + + Attributes: + SUCCESS: Green color code for success messages + STATUS: Blue color code for status/info messages + WARNING: Yellow color code for warnings + ERROR: Red color code for errors + BOLD: Bold text formatting code + END: Code to reset text formatting + """ + SUCCESS = "\033[92m" STATUS = "\033[94m" WARNING = "\033[93m" @@ -162,38 +258,84 @@ class ColoredWrapper: END = "\033[0m" def __init__(self, prefix, logger, verbose=True, propagte=False): + """ + Initialize the colored logging wrapper. + + Args: + prefix: Prefix for log messages (usually class name) + logger: Python logger to propagate to + verbose: Whether to show debug messages + propagte: Whether to propagate messages to the Python logger + """ self.verbose = verbose self.propagte = propagte self.prefix = prefix self._logging = logger def debug(self, message): + """ + Log a debug message. + + Args: + message: The message to log + """ if self.verbose: self._print(message, ColoredWrapper.STATUS) if self.propagte: self._logging.debug(message) def info(self, message): + """ + Log an informational message. + + Args: + message: The message to log + """ self._print(message, ColoredWrapper.SUCCESS) if self.propagte: self._logging.info(message) def warning(self, message): + """ + Log a warning message. + + Args: + message: The message to log + """ self._print(message, ColoredWrapper.WARNING) if self.propagte: self._logging.warning(message) def error(self, message): + """ + Log an error message. + + Args: + message: The message to log + """ self._print(message, ColoredWrapper.ERROR) if self.propagte: self._logging.error(message) def critical(self, message): + """ + Log a critical error message. + + Args: + message: The message to log + """ self._print(message, ColoredWrapper.ERROR) if self.propagte: self._logging.critical(message) def _print(self, message, color): + """ + Print a formatted message to the console. + + Args: + message: The message to print + color: ANSI color code to use + """ timestamp = datetime.datetime.now().strftime("%H:%M:%S.%f") click.echo( f"{color}{ColoredWrapper.BOLD}[{timestamp}]{ColoredWrapper.END} " @@ -202,7 +344,25 @@ def _print(self, message, color): class LoggingHandlers: + """ + Configures and manages logging handlers. + + This class sets up handlers for logging to files and tracks verbosity settings + for use with ColoredWrapper. + + Attributes: + handler: FileHandler for logging to a file + verbosity: Whether to include debug-level messages + """ + def __init__(self, verbose: bool = False, filename: Optional[str] = None): + """ + Initialize logging handlers. + + Args: + verbose: Whether to include debug-level messages + filename: Optional file to log to + """ logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" formatter = logging.Formatter(logging_format, logging_date_format) @@ -220,7 +380,25 @@ def __init__(self, verbose: bool = False, filename: Optional[str] = None): class LoggingBase: + """ + Base class providing consistent logging functionality across the framework. + + This class sets up a logger with a unique identifier and provides methods + for logging at different levels with consistent formatting. It supports + both console output with color coding and optional file logging. + + Attributes: + log_name: Unique identifier for this logger + logging: ColoredWrapper for formatted console output + """ + def __init__(self): + """ + Initialize the logging base with a unique identifier. + + Creates a unique name for the logger based on class name and a random ID, + then configures a standard logger and colored wrapper. + """ uuid_name = str(uuid.uuid4())[0:4] if hasattr(self, "typename"): self.log_name = f"{self.typename()}-{uuid_name}" @@ -233,16 +411,34 @@ def __init__(self): @property def logging(self) -> ColoredWrapper: + """ + Get the colored logging wrapper. + + Returns: + ColoredWrapper: The logging wrapper for this instance + """ # This would always print log with color. And only if # filename in LoggingHandlers is set, it would log to file. return self.wrapper @property def logging_handlers(self) -> LoggingHandlers: + """ + Get the logging handlers configuration. + + Returns: + LoggingHandlers: The current handlers configuration + """ return self._logging_handlers @logging_handlers.setter def logging_handlers(self, handlers: LoggingHandlers): + """ + Set new logging handlers configuration. + + Args: + handlers: The new handlers configuration to use + """ self._logging_handlers = handlers self._logging.propagate = False @@ -258,21 +454,50 @@ def logging_handlers(self, handlers: LoggingHandlers): def has_platform(name: str) -> bool: + """ + Check if a specific platform is enabled via environment variable. + + Looks for SEBS_WITH_{name} environment variable set to 'true'. + + Args: + name: Platform name to check + + Returns: + bool: True if platform is enabled, False otherwise + """ return os.environ.get(f"SEBS_WITH_{name.upper()}", "False").lower() == "true" -# Check if the system is Linux and that it's not WSL def is_linux() -> bool: + """ + Check if the system is Linux and not Windows Subsystem for Linux. + + Returns: + bool: True if native Linux, False otherwise + """ return platform.system() == "Linux" and "microsoft" not in platform.release().lower() -def catch_interrupt(): +def catch_interrupt() -> None: + """ + Set up a signal handler to catch interrupt signals (Ctrl+C). + Prints a stack trace and exits when an interrupt is received. + This helps with debugging by showing the execution context at + the time of the interruption. + """ import signal import sys import traceback def handler(x, y): + """ + Handle interrupt signal by printing stack trace and exiting. + + Args: + x: Signal number + y: Frame object + """ traceback.print_stack() sys.exit(signal.SIGINT)