From c34f3b92d76892863054bebae79ba584baeef898 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 18 Jun 2025 23:17:16 +0000 Subject: [PATCH 01/17] add logs pipeline --- .../distro/aws_opentelemetry_configurator.py | 15 +- .../logs/aws_batch_log_record_processor.py | 160 ++++++++++++ .../otlp/aws/logs/otlp_aws_logs_exporter.py | 161 +++++++++++- .../otlp/aws/common/test_aws_auth_session.py | 63 +++++ .../aws_batch_log_record_processor_test.py | 236 ++++++++++++++++++ .../aws/logs/otlp_aws_logs_exporter_test.py | 180 +++++++++++++ 6 files changed, 810 insertions(+), 5 deletions(-) create mode 100644 aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index a08374bbe..b21bc6151 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -4,7 +4,7 @@ import os import re from logging import NOTSET, Logger, getLogger -from typing import ClassVar, Dict, List, Type, Union +from typing import ClassVar, Dict, List, Optional, Type, Union from importlib_metadata import version from typing_extensions import override @@ -22,6 +22,7 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsBatchLogRecordProcessor from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter @@ -181,7 +182,9 @@ def _init_logging( # Provides a default OTLP log exporter when none is specified. # This is the behavior for the logs exporters for other languages. - if not exporters: + logs_exporter = os.environ.get("OTEL_LOGS_EXPORTER") + + if not exporters and logs_exporter and (logs_exporter.lower() != "none"): exporters = {"otlp": OTLPLogExporter} provider = LoggerProvider(resource=resource) @@ -190,7 +193,11 @@ def _init_logging( for _, exporter_class in exporters.items(): exporter_args: Dict[str, any] = {} log_exporter = _customize_logs_exporter(exporter_class(**exporter_args), resource) - provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter)) + + if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): + provider.add_log_record_processor(AwsBatchLogRecordProcessor(exporter=log_exporter)) + else: + provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter)) handler = LoggingHandler(level=NOTSET, logger_provider=provider) @@ -532,7 +539,7 @@ def _is_lambda_environment(): return AWS_LAMBDA_FUNCTION_NAME_CONFIG in os.environ -def _is_aws_otlp_endpoint(otlp_endpoint: str = None, service: str = "xray") -> bool: +def _is_aws_otlp_endpoint(otlp_endpoint: Optional[str] = None, service: str = "xray") -> bool: """Is the given endpoint an AWS OTLP endpoint?""" pattern = AWS_TRACES_OTLP_ENDPOINT_PATTERN if service == "xray" else AWS_LOGS_OTLP_ENDPOINT_PATTERN diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py new file mode 100644 index 000000000..8feada9a0 --- /dev/null +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -0,0 +1,160 @@ +import logging +from typing import Mapping, Optional, Sequence, cast + +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter +from opentelemetry.context import ( + _SUPPRESS_INSTRUMENTATION_KEY, + attach, + detach, + set_value, +) +from opentelemetry.sdk._logs import LogData +from opentelemetry.sdk._logs._internal.export import BatchLogExportStrategy +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.util.types import AnyValue + +_logger = logging.getLogger(__name__) + + +class AwsBatchLogRecordProcessor(BatchLogRecordProcessor): + _BASE_LOG_BUFFER_BYTE_SIZE = ( + 2000 # Buffer size in bytes to account for log metadata not included in the body size calculation + ) + _MAX_LOG_REQUEST_BYTE_SIZE = ( + 1048576 # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html + ) + + def __init__( + self, + exporter: OTLPAwsLogExporter, + schedule_delay_millis: Optional[float] = None, + max_export_batch_size: Optional[int] = None, + export_timeout_millis: Optional[float] = None, + max_queue_size: Optional[int] = None, + ): + + super().__init__( + exporter=exporter, + schedule_delay_millis=schedule_delay_millis, + max_export_batch_size=max_export_batch_size, + export_timeout_millis=export_timeout_millis, + max_queue_size=max_queue_size, + ) + + self._exporter = exporter + + # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 + def _export(self, batch_strategy: BatchLogExportStrategy) -> None: + """ + Preserves existing batching behavior but will intermediarly export small log batches if + the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB. + + - Data size of exported batches will ALWAYS be <= 1 MB except for the case below: + - If the data size of an exported batch is ever > 1 MB then the batch size is guaranteed to be 1 + """ + with self._export_lock: + iteration = 0 + # We could see concurrent export calls from worker and force_flush. We call _should_export_batch + # once the lock is obtained to see if we still need to make the requested export. + while self._should_export_batch(batch_strategy, iteration): + iteration += 1 + token = attach(set_value(_SUPPRESS_INSTRUMENTATION_KEY, True)) + try: + batch_length = min(self._max_export_batch_size, len(self._queue)) + batch_data_size = 0 + batch = [] + + for _ in range(batch_length): + log_data: LogData = self._queue.pop() + log_size = self._BASE_LOG_BUFFER_BYTE_SIZE + self._get_any_value_size(log_data.log_record.body) + + if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): + # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 + if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: + if self._is_gen_ai_log(batch[0]): + self._exporter.set_gen_ai_log_flag() + + self._exporter.export(batch) + batch_data_size = 0 + batch = [] + + batch_data_size += log_size + batch.append(log_data) + + if batch: + # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 + if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: + if self._is_gen_ai_log(batch[0]): + self._exporter.set_gen_ai_log_flag() + + self._exporter.export(batch) + except Exception as e: # pylint: disable=broad-exception-caught + _logger.exception("Exception while exporting logs: " + str(e)) + detach(token) + + def _get_any_value_size(self, val: AnyValue, depth: int = 3) -> int: + """ + Only used to indicate whether we should export a batch log size of 1 or not. + Calculates the size in bytes of an AnyValue object. + Will processs complex AnyValue structures up to the specified depth limit. + If the depth limit of the AnyValue structure is exceeded, returns 0. + + Args: + val: The AnyValue object to calculate size for + depth: Maximum depth to traverse in nested structures (default: 3) + + Returns: + int: Total size of the AnyValue object in bytes + """ + # Use a stack to prevent excessive recursive calls. + stack = [(val, 0)] + size: int = 0 + + while stack: + # small optimization. We can stop calculating the size once it reaches the 1 MB limit. + if size >= self._MAX_LOG_REQUEST_BYTE_SIZE: + return size + + next_val, current_depth = stack.pop() + + if isinstance(next_val, (str, bytes)): + size += len(next_val) + continue + + if isinstance(next_val, bool): + size += 4 if next_val else 5 + continue + + if isinstance(next_val, (float, int)): + size += len(str(next_val)) + continue + + if current_depth <= depth: + if isinstance(next_val, Sequence): + for content in next_val: + stack.append((cast(AnyValue, content), current_depth + 1)) + + if isinstance(next_val, Mapping): + for key, content in next_val.items(): + size += len(key) + stack.append((content, current_depth + 1)) + else: + _logger.debug("Max log depth exceeded. Log data size will not be accurately calculated.") + return 0 + + return size + + @staticmethod + def _is_gen_ai_log(log_data: LogData) -> bool: + """ + Is the log a Gen AI log event? + """ + gen_ai_instrumentations = { + "openinference.instrumentation.langchain", + "openinference.instrumentation.crewai", + "opentelemetry.instrumentation.langchain", + "crewai.telemetry", + "openlit.otel.tracing", + } + + return log_data.instrumentation_scope.name in gen_ai_instrumentations diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 048632c06..64203b434 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -1,14 +1,41 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -from typing import Dict, Optional +import gzip +import logging +from io import BytesIO +from time import sleep +from typing import Dict, Optional, Sequence + +import requests from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession +from opentelemetry.exporter.otlp.proto.common._internal import ( + _create_exp_backoff_generator, +) +from opentelemetry.exporter.otlp.proto.common._log_encoder import encode_logs from opentelemetry.exporter.otlp.proto.http import Compression from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter +from opentelemetry.sdk._logs import ( + LogData, +) +from opentelemetry.sdk._logs.export import ( + LogExportResult, +) + +_logger = logging.getLogger(__name__) class OTLPAwsLogExporter(OTLPLogExporter): + _LARGE_LOG_HEADER = "x-aws-truncatable-fields" + _LARGE_GEN_AI_LOG_PATH_HEADER = ( + "\\$['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['body']" + "['kvlistValue']['values'][*]['value']['kvlistValue']['values'][*]" + "['value']['arrayValue']['values'][*]['kvlistValue']['values'][*]" + "['value']['stringValue']" + ) + _RETRY_AFTER_HEADER = "Retry-After" # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + def __init__( self, endpoint: Optional[str] = None, @@ -18,6 +45,7 @@ def __init__( headers: Optional[Dict[str, str]] = None, timeout: Optional[int] = None, ): + self._gen_ai_log_flag = False self._aws_region = None if endpoint: @@ -34,3 +62,134 @@ def __init__( compression=Compression.Gzip, session=AwsAuthSession(aws_region=self._aws_region, service="logs"), ) + + # https://github.com/open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py#L167 + def export(self, batch: Sequence[LogData]) -> LogExportResult: + """ + Exports the given batch of OTLP log data. + Behaviors of how this export will work - + + 1. Always compresses the serialized data into gzip before sending. + + 2. If self._gen_ai_log_flag is enabled, the log data is > 1 MB a + and the assumption is that the log is a normalized gen.ai LogEvent. + - inject the {LARGE_LOG_HEADER} into the header. + + 3. Retry behavior is now the following: + - if the response contains a status code that is retryable and the response contains Retry-After in its + headers, the serialized data will be exported after that set delay + + - if the response does not contain that Retry-After header, default back to the current iteration of the + exponential backoff delay + """ + + if self._shutdown: + _logger.warning("Exporter already shutdown, ignoring batch") + return LogExportResult.FAILURE + + serialized_data = encode_logs(batch).SerializeToString() + + gzip_data = BytesIO() + with gzip.GzipFile(fileobj=gzip_data, mode="w") as gzip_stream: + gzip_stream.write(serialized_data) + + data = gzip_data.getvalue() + + backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) + + while True: + resp = self._send(data) + + if resp.ok: + return LogExportResult.SUCCESS + + if not self._retryable(resp): + _logger.error( + "Failed to export logs batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) + self._gen_ai_log_flag = False + return LogExportResult.FAILURE + + # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + maybe_retry_after = resp.headers.get(self._RETRY_AFTER_HEADER, None) + + # Set the next retry delay to the value of the Retry-After response in the headers. + # If Retry-After is not present in the headers, default to the next iteration of the + # exponential backoff strategy. + + delay = self._parse_retryable_header(maybe_retry_after) + + if delay == -1: + delay = next(backoff, self._MAX_RETRY_TIMEOUT) + + if delay == self._MAX_RETRY_TIMEOUT: + _logger.error( + "Transient error %s encountered while exporting logs batch. " + "No Retry-After header found and all backoff retries exhausted. " + "Logs will not be exported.", + resp.reason, + ) + self._gen_ai_log_flag = False + return LogExportResult.FAILURE + + _logger.warning( + "Transient error %s encountered while exporting logs batch, retrying in %ss.", + resp.reason, + delay, + ) + + sleep(delay) + + def set_gen_ai_log_flag(self): + """ + Sets a flag that indicates the current log batch contains + a generative AI log record that exceeds the CloudWatch Logs size limit (1MB). + """ + self._gen_ai_log_flag = True + + def _send(self, serialized_data: bytes): + try: + response = self._session.post( + url=self._endpoint, + headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, + data=serialized_data, + verify=self._certificate_file, + timeout=self._timeout, + cert=self._client_cert, + ) + return response + except ConnectionError: + response = self._session.post( + url=self._endpoint, + headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, + data=serialized_data, + verify=self._certificate_file, + timeout=self._timeout, + cert=self._client_cert, + ) + return response + + @staticmethod + def _retryable(resp: requests.Response) -> bool: + """ + Is it a retryable response? + """ + + return resp.status_code in (429, 503) or OTLPLogExporter._retryable(resp) + + @staticmethod + def _parse_retryable_header(retry_header: Optional[str]) -> float: + """ + Converts the given retryable header into a delay in seconds, returns -1 if there's no header + or error with the parsing + """ + if not retry_header: + return -1 + + try: + val = float(retry_header) + return val if val >= 0 else -1 + except ValueError: + return -1 diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py new file mode 100644 index 000000000..e0c62b89d --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -0,0 +1,63 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from unittest import TestCase +from unittest.mock import patch + +import requests +from botocore.credentials import Credentials + +from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession + +AWS_OTLP_TRACES_ENDPOINT = "https://xray.us-east-1.amazonaws.com/v1/traces" +AWS_OTLP_LOGS_ENDPOINT = "https://logs.us-east-1.amazonaws.com/v1/logs" + +AUTHORIZATION_HEADER = "Authorization" +X_AMZ_DATE_HEADER = "X-Amz-Date" +X_AMZ_SECURITY_TOKEN_HEADER = "X-Amz-Security-Token" + +mock_credentials = Credentials(access_key="test_access_key", secret_key="test_secret_key", token="test_session_token") + + +class TestAwsAuthSession(TestCase): + @patch("pkg_resources.get_distribution", side_effect=ImportError("test error")) + @patch.dict("sys.modules", {"botocore": None}, clear=False) + @patch("requests.Session.request", return_value=requests.Response()) + def test_aws_auth_session_no_botocore(self, _, __): + """Tests that aws_auth_session will not inject SigV4 Headers if botocore is not installed.""" + + session = AwsAuthSession("us-east-1", "xray") + actual_headers = {"test": "test"} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) + self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) + self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=None) + def test_aws_auth_session_no_credentials(self, _, __): + """Tests that aws_auth_session will not inject SigV4 Headers if retrieving credentials returns None.""" + + session = AwsAuthSession("us-east-1", "xray") + actual_headers = {"test": "test"} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) + self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) + self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + def test_aws_auth_session(self, _, __): + """Tests that aws_auth_session will inject SigV4 Headers if botocore is installed.""" + + session = AwsAuthSession("us-east-1", "xray") + actual_headers = {"test": "test"} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertIn(AUTHORIZATION_HEADER, actual_headers) + self.assertIn(X_AMZ_DATE_HEADER, actual_headers) + self.assertIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py new file mode 100644 index 000000000..1abf680f1 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py @@ -0,0 +1,236 @@ +import time +import unittest +from typing import List +from unittest.mock import MagicMock, patch + +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( + AwsBatchLogRecordProcessor, + BatchLogExportStrategy, +) +from opentelemetry._logs.severity import SeverityNumber +from opentelemetry.sdk._logs import LogData, LogRecord +from opentelemetry.sdk._logs.export import LogExportResult +from opentelemetry.sdk.util.instrumentation import InstrumentationScope +from opentelemetry.trace import TraceFlags +from opentelemetry.util.types import AnyValue + + +class TestAwsBatchLogRecordProcessor(unittest.TestCase): + + def setUp(self): + self.mock_exporter = MagicMock() + self.mock_exporter.export.return_value = LogExportResult.SUCCESS + + self.processor = AwsBatchLogRecordProcessor(exporter=self.mock_exporter) + + def test_process_log_data_nested_structure(self): + """Tests that the processor correctly handles nested structures (dict/list)""" + message_size = 400 + depth = 2 + + nested_dict_log_body = self.generate_nested_log_body( + depth=depth, expected_body="X" * message_size, create_map=True + ) + nested_array_log_body = self.generate_nested_log_body( + depth=depth, expected_body="X" * message_size, create_map=False + ) + + dict_size = self.processor._get_any_value_size(val=nested_dict_log_body, depth=depth) + array_size = self.processor._get_any_value_size(val=nested_array_log_body, depth=depth) + + # Asserting almost equal to account for key lengths in the Log object body + self.assertAlmostEqual(dict_size, message_size, delta=20) + self.assertAlmostEqual(array_size, message_size, delta=20) + + def test_process_log_data_nested_structure_exceeds_depth(self): + """Tests that the processor returns 0 for nested structure that exceeds the depth limit""" + message_size = 400 + log_body = "X" * message_size + + nested_dict_log_body = self.generate_nested_log_body(depth=4, expected_body=log_body, create_map=True) + nested_array_log_body = self.generate_nested_log_body(depth=4, expected_body=log_body, create_map=False) + + dict_size = self.processor._get_any_value_size(val=nested_dict_log_body, depth=3) + array_size = self.processor._get_any_value_size(val=nested_array_log_body, depth=3) + + self.assertEqual(dict_size, 0) + self.assertEqual(array_size, 0) + + def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): + """Tests that the processor returns prematurely if the size already exceeds _MAX_LOG_REQUEST_BYTE_SIZE""" + log_body = { + "smallKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE // 2), + "bigKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1), + } + + nested_dict_log_body = self.generate_nested_log_body(depth=0, expected_body=log_body, create_map=True) + nested_array_log_body = self.generate_nested_log_body(depth=0, expected_body=log_body, create_map=False) + + dict_size = self.processor._get_any_value_size(val=nested_dict_log_body) + array_size = self.processor._get_any_value_size(val=nested_array_log_body) + + self.assertAlmostEqual(dict_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE, delta=20) + self.assertAlmostEqual(array_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE, delta=20) + + def test_process_log_data_primitive(self): + + primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None] + expected_sizes = [4, 4, 1, 3, 4, 5, 0] + + for i in range(len(primitives)): + body = primitives[i] + expected_size = expected_sizes[i] + + actual_size = self.processor._get_any_value_size(body) + self.assertEqual(actual_size, expected_size) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + return_value=MagicMock(), + ) + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + def test_export_single_batch_under_size_limit(self, _, __, ___): + """Tests that export is only called once if a single batch is under the size limit""" + log_count = 10 + log_body = "test" + test_logs = self.generate_test_log_data(count=log_count, log_body=log_body) + total_data_size = 0 + + for log in test_logs: + size = self.processor._get_any_value_size(log.log_record.body) + total_data_size += size + self.processor._queue.appendleft(log) + + self.processor._export(batch_strategy=BatchLogExportStrategy.EXPORT_ALL) + args, _ = self.mock_exporter.export.call_args + actual_batch = args[0] + + self.assertLess(total_data_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE) + self.assertEqual(len(self.processor._queue), 0) + self.assertEqual(len(actual_batch), log_count) + self.mock_exporter.export.assert_called_once() + self.mock_exporter.set_gen_ai_log_flag.assert_not_called() + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + return_value=MagicMock(), + ) + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): + """Should make multiple export calls of batch size 1 to export logs of size > 1 MB. + But should only call set_gen_ai_log_flag if it's a Gen AI log event.""" + + large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) + non_gen_ai_test_logs = self.generate_test_log_data(count=3, log_body=large_log_body) + gen_ai_test_logs = [] + + gen_ai_scopes = [ + "openinference.instrumentation.langchain", + "openinference.instrumentation.crewai", + "opentelemetry.instrumentation.langchain", + "crewai.telemetry", + "openlit.otel.tracing", + ] + + for gen_ai_scope in gen_ai_scopes: + gen_ai_test_logs.extend( + self.generate_test_log_data( + count=1, log_body=large_log_body, instrumentation_scope=InstrumentationScope(gen_ai_scope, "1.0.0") + ) + ) + + test_logs = gen_ai_test_logs + non_gen_ai_test_logs + + for log in test_logs: + self.processor._queue.appendleft(log) + + self.processor._export(batch_strategy=BatchLogExportStrategy.EXPORT_ALL) + + self.assertEqual(len(self.processor._queue), 0) + self.assertEqual(self.mock_exporter.export.call_count, 3 + len(gen_ai_test_logs)) + self.assertEqual(self.mock_exporter.set_gen_ai_log_flag.call_count, len(gen_ai_test_logs)) + + batches = self.mock_exporter.export.call_args_list + + for batch in batches: + self.assertEqual(len(batch[0]), 1) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + return_value=MagicMock(), + ) + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): + """Should make calls to export smaller sub-batch logs""" + large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) + gen_ai_scope = InstrumentationScope("openinference.instrumentation.langchain", "1.0.0") + small_log_body = "X" * ( + int(self.processor._MAX_LOG_REQUEST_BYTE_SIZE / 10) - self.processor._BASE_LOG_BUFFER_BYTE_SIZE + ) + test_logs = self.generate_test_log_data(count=3, log_body=large_log_body, instrumentation_scope=gen_ai_scope) + # 1st, 2nd, 3rd batch = size 1 + # 4th batch = size 10 + # 5th batch = size 2 + small_logs = self.generate_test_log_data(count=12, log_body=small_log_body, instrumentation_scope=gen_ai_scope) + + test_logs.extend(small_logs) + + for log in test_logs: + self.processor._queue.appendleft(log) + + self.processor._export(batch_strategy=BatchLogExportStrategy.EXPORT_ALL) + + self.assertEqual(len(self.processor._queue), 0) + self.assertEqual(self.mock_exporter.export.call_count, 5) + self.assertEqual(self.mock_exporter.set_gen_ai_log_flag.call_count, 3) + + batches = self.mock_exporter.export.call_args_list + + expected_sizes = { + 0: 1, # 1st batch (index 1) should have 1 log + 1: 1, # 2nd batch (index 1) should have 1 log + 2: 1, # 3rd batch (index 2) should have 1 log + 3: 10, # 4th batch (index 3) should have 10 logs + 4: 2, # 5th batch (index 4) should have 2 logs + } + + for i, call in enumerate(batches): + batch = call[0][0] + expected_size = expected_sizes[i] + self.assertEqual(len(batch), expected_size) + + def generate_test_log_data( + self, log_body: AnyValue, count=5, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0") + ) -> List[LogData]: + logs = [] + for i in range(count): + record = LogRecord( + timestamp=int(time.time_ns()), + trace_id=int(f"0x{i + 1:032x}", 16), + span_id=int(f"0x{i + 1:016x}", 16), + trace_flags=TraceFlags(1), + severity_text="INFO", + severity_number=SeverityNumber.INFO, + body=log_body, + attributes={"test.attribute": f"value-{i + 1}"}, + ) + + log_data = LogData(log_record=record, instrumentation_scope=instrumentation_scope) + logs.append(log_data) + + return logs + + @staticmethod + def generate_nested_log_body(depth=0, expected_body: AnyValue = "test", create_map=True): + if depth < 0: + return expected_body + + if create_map: + return { + "key": TestAwsBatchLogRecordProcessor.generate_nested_log_body(depth - 1, expected_body, create_map) + } + + return [TestAwsBatchLogRecordProcessor.generate_nested_log_body(depth - 1, expected_body, create_map)] diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py new file mode 100644 index 000000000..9f6d84b32 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py @@ -0,0 +1,180 @@ +import time +from unittest import TestCase +from unittest.mock import patch + +import requests +from requests.structures import CaseInsensitiveDict + +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter +from opentelemetry._logs.severity import SeverityNumber +from opentelemetry.sdk._logs import LogData, LogRecord +from opentelemetry.sdk._logs.export import ( + LogExportResult, +) +from opentelemetry.sdk.util.instrumentation import InstrumentationScope +from opentelemetry.trace import TraceFlags + + +class TestOTLPAwsLogsExporter(TestCase): + _ENDPOINT = "https://logs.us-west-2.amazonaws.com/v1/logs" + good_response = requests.Response() + good_response.status_code = 200 + + non_retryable_response = requests.Response() + non_retryable_response.status_code = 404 + + retryable_response_no_header = requests.Response() + retryable_response_no_header.status_code = 429 + + retryable_response_header = requests.Response() + retryable_response_header.headers = CaseInsensitiveDict({"Retry-After": "10"}) + retryable_response_header.status_code = 503 + + retryable_response_bad_header = requests.Response() + retryable_response_bad_header.headers = CaseInsensitiveDict({"Retry-After": "-12"}) + retryable_response_bad_header.status_code = 503 + + def setUp(self): + self.logs = self.generate_test_log_data() + self.exporter = OTLPAwsLogExporter(endpoint=self._ENDPOINT) + + @patch("requests.Session.request", return_value=good_response) + def test_export_success(self, mock_request): + """Tests that the exporter always compresses the serialized logs with gzip before exporting.""" + result = self.exporter.export(self.logs) + + mock_request.assert_called_once() + + _, kwargs = mock_request.call_args + data = kwargs.get("data", None) + + self.assertEqual(result, LogExportResult.SUCCESS) + + # Gzip first 10 bytes are reserved for metadata headers: + # https://www.loc.gov/preservation/digital/formats/fdd/fdd000599.shtml?loclr=blogsig + self.assertIsNotNone(data) + self.assertTrue(len(data) >= 10) + self.assertEqual(data[0:2], b"\x1f\x8b") + + @patch("requests.Session.request", return_value=good_response) + def test_export_gen_ai_logs(self, mock_request): + """Tests that when set_gen_ai_log_flag is set, the exporter includes the LLO header in the request.""" + + self.exporter.set_gen_ai_log_flag() + + result = self.exporter.export(self.logs) + + mock_request.assert_called_once() + + _, kwargs = mock_request.call_args + headers = kwargs.get("headers", None) + + self.assertEqual(result, LogExportResult.SUCCESS) + self.assertIsNotNone(headers) + self.assertIn(self.exporter._LARGE_LOG_HEADER, headers) + self.assertEqual(headers[self.exporter._LARGE_LOG_HEADER], self.exporter._LARGE_GEN_AI_LOG_PATH_HEADER) + + @patch("requests.Session.request", return_value=good_response) + def test_should_not_export_if_shutdown(self, mock_request): + """Tests that no export request is made if the exporter is shutdown.""" + self.exporter.shutdown() + result = self.exporter.export(self.logs) + + mock_request.assert_not_called() + self.assertEqual(result, LogExportResult.FAILURE) + + @patch("requests.Session.request", return_value=non_retryable_response) + def test_should_not_export_again_if_not_retryable(self, mock_request): + """Tests that only one export request is made if the response status code is non-retryable.""" + result = self.exporter.export(self.logs) + mock_request.assert_called_once() + + self.assertEqual(result, LogExportResult.FAILURE) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None + ) + @patch("requests.Session.request", return_value=retryable_response_no_header) + def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header(self, mock_request, mock_sleep): + """Tests that multiple export requests are made with exponential delay if the response status code is retryable. + But there is no Retry-After header.""" + result = self.exporter.export(self.logs) + + # 1, 2, 4, 8, 16, 32 delays + self.assertEqual(mock_sleep.call_count, 6) + + delays = mock_sleep.call_args_list + + for i in range(len(delays)): + self.assertEqual(delays[i][0][0], 2**i) + + # Number of calls: 1 + len(1, 2, 4, 8, 16, 32 delays) + self.assertEqual(mock_request.call_count, 7) + self.assertEqual(result, LogExportResult.FAILURE) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None + ) + @patch( + "requests.Session.request", + side_effect=[retryable_response_header, retryable_response_header, retryable_response_header, good_response], + ) + def test_should_export_again_with_server_delay_if_retryable_and_retry_after_header(self, mock_request, mock_sleep): + """Tests that multiple export requests are made with the server's suggested + delay if the response status code is retryable and there is a Retry-After header.""" + result = self.exporter.export(self.logs) + delays = mock_sleep.call_args_list + + for i in range(len(delays)): + self.assertEqual(delays[i][0][0], 10) + + self.assertEqual(mock_sleep.call_count, 3) + self.assertEqual(mock_request.call_count, 4) + self.assertEqual(result, LogExportResult.SUCCESS) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None + ) + @patch( + "requests.Session.request", + side_effect=[ + retryable_response_bad_header, + retryable_response_bad_header, + retryable_response_bad_header, + good_response, + ], + ) + def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after_header( + self, mock_request, mock_sleep + ): + """Tests that multiple export requests are made with exponential delay if the response status code is retryable. + but the Retry-After header ins invalid or malformed.""" + result = self.exporter.export(self.logs) + delays = mock_sleep.call_args_list + + for i in range(len(delays)): + self.assertEqual(delays[i][0][0], 2**i) + + self.assertEqual(mock_sleep.call_count, 3) + self.assertEqual(mock_request.call_count, 4) + self.assertEqual(result, LogExportResult.SUCCESS) + + def generate_test_log_data(self, count=5): + logs = [] + for i in range(count): + record = LogRecord( + timestamp=int(time.time_ns()), + trace_id=int(f"0x{i + 1:032x}", 16), + span_id=int(f"0x{i + 1:016x}", 16), + trace_flags=TraceFlags(1), + severity_text="INFO", + severity_number=SeverityNumber.INFO, + body=f"Test log {i + 1}", + attributes={"test.attribute": f"value-{i + 1}"}, + ) + + log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) + + logs.append(log_data) + + return logs From 010e7dfe9721f2ede2eadf05aa260d5af6c8afc1 Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:12:40 +0000 Subject: [PATCH 02/17] add logs pipeline --- aws-opentelemetry-distro/pyproject.toml | 108 ++-- .../distro/_aws_metric_attribute_generator.py | 2 +- .../distro/_aws_span_processing_util.py | 10 - .../src/amazon/opentelemetry/distro/_utils.py | 20 +- .../distro/aws_opentelemetry_configurator.py | 6 +- .../logs/aws_batch_log_record_processor.py | 118 ++-- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 53 +- .../metrics/aws_cloudwatch_emf_exporter.py | 523 +++++++++++++++++ .../distro/patches/_bedrock_patches.py | 224 +------ .../distro/patches/_botocore_patches.py | 26 +- .../otlp/aws/common/test_aws_auth_session.py | 23 +- ...=> test_aws_batch_log_record_processor.py} | 176 ++++-- ...test.py => test_otlp_aws_logs_exporter.py} | 26 +- .../test_aws_cloudwatch_emf_exporter.py | 547 ++++++++++++++++++ .../traces}/test_otlp_aws_span_exporter.py | 0 .../distro/test_aws_auth_session.py | 63 -- .../test_aws_metric_attribute_generator.py | 2 +- .../test_aws_opentelementry_configurator.py | 17 + .../distro/test_aws_opentelemetry_distro.py | 7 +- .../distro/test_instrumentation_patch.py | 306 ++-------- .../amazon/opentelemetry/distro/test_utils.py | 96 +++ .../applications/botocore/botocore_server.py | 28 +- .../applications/botocore/requirements.txt | 2 - .../applications/django/requirements.txt | 2 - .../mysql-connector/requirements.txt | 2 - .../applications/mysqlclient/requirements.txt | 2 - .../applications/psycopg2/requirements.txt | 2 - .../applications/pymysql/requirements.txt | 2 - .../applications/requests/requirements.txt | 2 - .../images/mock-collector/pyproject.toml | 6 +- .../images/mock-collector/requirements.txt | 6 +- contract-tests/tests/pyproject.toml | 4 +- .../test/amazon/botocore/botocore_test.py | 41 +- 33 files changed, 1611 insertions(+), 841 deletions(-) create mode 100644 aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py rename aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/{aws_batch_log_record_processor_test.py => test_aws_batch_log_record_processor.py} (54%) rename aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/{otlp_aws_logs_exporter_test.py => test_otlp_aws_logs_exporter.py} (88%) create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py rename aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/{ => exporter/otlp/aws/traces}/test_otlp_aws_span_exporter.py (100%) delete mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_auth_session.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py diff --git a/aws-opentelemetry-distro/pyproject.toml b/aws-opentelemetry-distro/pyproject.toml index 3d8eadbc1..f8984854d 100644 --- a/aws-opentelemetry-distro/pyproject.toml +++ b/aws-opentelemetry-distro/pyproject.toml @@ -24,62 +24,62 @@ classifiers = [ ] dependencies = [ - "opentelemetry-api == 1.27.0", - "opentelemetry-sdk == 1.27.0", - "opentelemetry-exporter-otlp-proto-grpc == 1.27.0", - "opentelemetry-exporter-otlp-proto-http == 1.27.0", - "opentelemetry-propagator-b3 == 1.27.0", - "opentelemetry-propagator-jaeger == 1.27.0", - "opentelemetry-exporter-otlp-proto-common == 1.27.0", + "opentelemetry-api == 1.33.1", + "opentelemetry-sdk == 1.33.1", + "opentelemetry-exporter-otlp-proto-grpc == 1.33.1", + "opentelemetry-exporter-otlp-proto-http == 1.33.1", + "opentelemetry-propagator-b3 == 1.33.1", + "opentelemetry-propagator-jaeger == 1.33.1", + "opentelemetry-exporter-otlp-proto-common == 1.33.1", "opentelemetry-sdk-extension-aws == 2.0.2", "opentelemetry-propagator-aws-xray == 1.0.1", - "opentelemetry-distro == 0.48b0", - "opentelemetry-processor-baggage == 0.48b0", - "opentelemetry-propagator-ot-trace == 0.48b0", - "opentelemetry-instrumentation == 0.48b0", - "opentelemetry-instrumentation-aws-lambda == 0.48b0", - "opentelemetry-instrumentation-aio-pika == 0.48b0", - "opentelemetry-instrumentation-aiohttp-client == 0.48b0", - "opentelemetry-instrumentation-aiopg == 0.48b0", - "opentelemetry-instrumentation-asgi == 0.48b0", - "opentelemetry-instrumentation-asyncpg == 0.48b0", - "opentelemetry-instrumentation-boto == 0.48b0", - "opentelemetry-instrumentation-boto3sqs == 0.48b0", - "opentelemetry-instrumentation-botocore == 0.48b0", - "opentelemetry-instrumentation-celery == 0.48b0", - "opentelemetry-instrumentation-confluent-kafka == 0.48b0", - "opentelemetry-instrumentation-dbapi == 0.48b0", - "opentelemetry-instrumentation-django == 0.48b0", - "opentelemetry-instrumentation-elasticsearch == 0.48b0", - "opentelemetry-instrumentation-falcon == 0.48b0", - "opentelemetry-instrumentation-fastapi == 0.48b0", - "opentelemetry-instrumentation-flask == 0.48b0", - "opentelemetry-instrumentation-grpc == 0.48b0", - "opentelemetry-instrumentation-httpx == 0.48b0", - "opentelemetry-instrumentation-jinja2 == 0.48b0", - "opentelemetry-instrumentation-kafka-python == 0.48b0", - "opentelemetry-instrumentation-logging == 0.48b0", - "opentelemetry-instrumentation-mysql == 0.48b0", - "opentelemetry-instrumentation-mysqlclient == 0.48b0", - "opentelemetry-instrumentation-pika == 0.48b0", - "opentelemetry-instrumentation-psycopg2 == 0.48b0", - "opentelemetry-instrumentation-pymemcache == 0.48b0", - "opentelemetry-instrumentation-pymongo == 0.48b0", - "opentelemetry-instrumentation-pymysql == 0.48b0", - "opentelemetry-instrumentation-pyramid == 0.48b0", - "opentelemetry-instrumentation-redis == 0.48b0", - "opentelemetry-instrumentation-remoulade == 0.48b0", - "opentelemetry-instrumentation-requests == 0.48b0", - "opentelemetry-instrumentation-sqlalchemy == 0.48b0", - "opentelemetry-instrumentation-sqlite3 == 0.48b0", - "opentelemetry-instrumentation-starlette == 0.48b0", - "opentelemetry-instrumentation-system-metrics == 0.48b0", - "opentelemetry-instrumentation-tornado == 0.48b0", - "opentelemetry-instrumentation-tortoiseorm == 0.48b0", - "opentelemetry-instrumentation-urllib == 0.48b0", - "opentelemetry-instrumentation-urllib3 == 0.48b0", - "opentelemetry-instrumentation-wsgi == 0.48b0", - "opentelemetry-instrumentation-cassandra == 0.48b0", + "opentelemetry-distro == 0.54b1", + "opentelemetry-processor-baggage == 0.54b1", + "opentelemetry-propagator-ot-trace == 0.54b1", + "opentelemetry-instrumentation == 0.54b1", + "opentelemetry-instrumentation-aws-lambda == 0.54b1", + "opentelemetry-instrumentation-aio-pika == 0.54b1", + "opentelemetry-instrumentation-aiohttp-client == 0.54b1", + "opentelemetry-instrumentation-aiopg == 0.54b1", + "opentelemetry-instrumentation-asgi == 0.54b1", + "opentelemetry-instrumentation-asyncpg == 0.54b1", + "opentelemetry-instrumentation-boto == 0.54b1", + "opentelemetry-instrumentation-boto3sqs == 0.54b1", + "opentelemetry-instrumentation-botocore == 0.54b1", + "opentelemetry-instrumentation-celery == 0.54b1", + "opentelemetry-instrumentation-confluent-kafka == 0.54b1", + "opentelemetry-instrumentation-dbapi == 0.54b1", + "opentelemetry-instrumentation-django == 0.54b1", + "opentelemetry-instrumentation-elasticsearch == 0.54b1", + "opentelemetry-instrumentation-falcon == 0.54b1", + "opentelemetry-instrumentation-fastapi == 0.54b1", + "opentelemetry-instrumentation-flask == 0.54b1", + "opentelemetry-instrumentation-grpc == 0.54b1", + "opentelemetry-instrumentation-httpx == 0.54b1", + "opentelemetry-instrumentation-jinja2 == 0.54b1", + "opentelemetry-instrumentation-kafka-python == 0.54b1", + "opentelemetry-instrumentation-logging == 0.54b1", + "opentelemetry-instrumentation-mysql == 0.54b1", + "opentelemetry-instrumentation-mysqlclient == 0.54b1", + "opentelemetry-instrumentation-pika == 0.54b1", + "opentelemetry-instrumentation-psycopg2 == 0.54b1", + "opentelemetry-instrumentation-pymemcache == 0.54b1", + "opentelemetry-instrumentation-pymongo == 0.54b1", + "opentelemetry-instrumentation-pymysql == 0.54b1", + "opentelemetry-instrumentation-pyramid == 0.54b1", + "opentelemetry-instrumentation-redis == 0.54b1", + "opentelemetry-instrumentation-remoulade == 0.54b1", + "opentelemetry-instrumentation-requests == 0.54b1", + "opentelemetry-instrumentation-sqlalchemy == 0.54b1", + "opentelemetry-instrumentation-sqlite3 == 0.54b1", + "opentelemetry-instrumentation-starlette == 0.54b1", + "opentelemetry-instrumentation-system-metrics == 0.54b1", + "opentelemetry-instrumentation-tornado == 0.54b1", + "opentelemetry-instrumentation-tortoiseorm == 0.54b1", + "opentelemetry-instrumentation-urllib == 0.54b1", + "opentelemetry-instrumentation-urllib3 == 0.54b1", + "opentelemetry-instrumentation-wsgi == 0.54b1", + "opentelemetry-instrumentation-cassandra == 0.54b1", ] [project.optional-dependencies] diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py index ec5b693ed..173f8492b 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py @@ -35,7 +35,6 @@ ) from amazon.opentelemetry.distro._aws_resource_attribute_configurator import get_service_attribute from amazon.opentelemetry.distro._aws_span_processing_util import ( - GEN_AI_REQUEST_MODEL, LOCAL_ROOT, MAX_KEYWORD_LENGTH, SQL_KEYWORD_PATTERN, @@ -60,6 +59,7 @@ from amazon.opentelemetry.distro.sqs_url_parser import SqsUrlParser from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import BoundedAttributes, ReadableSpan +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_REQUEST_MODEL from opentelemetry.semconv.trace import SpanAttributes # Pertinent OTEL attribute keys diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py index 21e19afa9..d2a039861 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py @@ -26,16 +26,6 @@ # Max keyword length supported by parsing into remote_operation from DB_STATEMENT MAX_KEYWORD_LENGTH = 27 -# TODO: Use Semantic Conventions once upgrade to 0.47b0 -GEN_AI_REQUEST_MODEL: str = "gen_ai.request.model" -GEN_AI_SYSTEM: str = "gen_ai.system" -GEN_AI_REQUEST_MAX_TOKENS: str = "gen_ai.request.max_tokens" -GEN_AI_REQUEST_TEMPERATURE: str = "gen_ai.request.temperature" -GEN_AI_REQUEST_TOP_P: str = "gen_ai.request.top_p" -GEN_AI_RESPONSE_FINISH_REASONS: str = "gen_ai.response.finish_reasons" -GEN_AI_USAGE_INPUT_TOKENS: str = "gen_ai.usage.input_tokens" -GEN_AI_USAGE_OUTPUT_TOKENS: str = "gen_ai.usage.output_tokens" - # Get dialect keywords retrieved from dialect_keywords.json file. # Only meant to be invoked by SQL_KEYWORD_PATTERN and unit tests diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py index 149f9ad29..fa5acf42c 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py @@ -2,10 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 import os -import sys +from importlib.metadata import PackageNotFoundError, version from logging import Logger, getLogger -import pkg_resources +from packaging.requirements import Requirement _logger: Logger = getLogger(__name__) @@ -14,15 +14,21 @@ def is_installed(req: str) -> bool: """Is the given required package installed?""" - - if req in sys.modules and sys.modules[req] is not None: - return True + req = Requirement(req) try: - pkg_resources.get_distribution(req) - except Exception as exc: # pylint: disable=broad-except + dist_version = version(req.name) + except PackageNotFoundError as exc: _logger.debug("Skipping instrumentation patch: package %s, exception: %s", req, exc) return False + + if not list(req.specifier.filter([dist_version])): + _logger.debug( + "instrumentation for package %s is available but version %s is installed. Skipping.", + req, + dist_version, + ) + return False return True diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index b21bc6151..e39c916c5 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -180,11 +180,11 @@ def _init_logging( resource: Resource = None, ): - # Provides a default OTLP log exporter when none is specified. + # Provides a default OTLP log exporter when it's not set. # This is the behavior for the logs exporters for other languages. - logs_exporter = os.environ.get("OTEL_LOGS_EXPORTER") + logs_exporters = os.environ.get("OTEL_LOGS_EXPORTER") - if not exporters and logs_exporter and (logs_exporter.lower() != "none"): + if not exporters and logs_exporters and logs_exporters.lower() != "none": exporters = {"otlp": OTLPLogExporter} provider = LoggerProvider(resource=resource) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 8feada9a0..e57b03f3f 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -1,13 +1,11 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + import logging -from typing import Mapping, Optional, Sequence, cast +from typing import List, Mapping, Optional, Sequence, cast from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter -from opentelemetry.context import ( - _SUPPRESS_INSTRUMENTATION_KEY, - attach, - detach, - set_value, -) +from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY, attach, detach, set_value from opentelemetry.sdk._logs import LogData from opentelemetry.sdk._logs._internal.export import BatchLogExportStrategy from opentelemetry.sdk._logs.export import BatchLogRecordProcessor @@ -18,7 +16,7 @@ class AwsBatchLogRecordProcessor(BatchLogRecordProcessor): _BASE_LOG_BUFFER_BYTE_SIZE = ( - 2000 # Buffer size in bytes to account for log metadata not included in the body size calculation + 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation ) _MAX_LOG_REQUEST_BYTE_SIZE = ( 1048576 # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html @@ -66,7 +64,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: for _ in range(batch_length): log_data: LogData = self._queue.pop() - log_size = self._BASE_LOG_BUFFER_BYTE_SIZE + self._get_any_value_size(log_data.log_record.body) + log_size = self._estimate_log_size(log_data) if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 @@ -88,64 +86,74 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: self._exporter.set_gen_ai_log_flag() self._exporter.export(batch) - except Exception as e: # pylint: disable=broad-exception-caught - _logger.exception("Exception while exporting logs: " + str(e)) + except Exception as exception: # pylint: disable=broad-exception-caught + _logger.exception("Exception while exporting logs: " + str(exception)) detach(token) - def _get_any_value_size(self, val: AnyValue, depth: int = 3) -> int: + def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: """ - Only used to indicate whether we should export a batch log size of 1 or not. - Calculates the size in bytes of an AnyValue object. - Will processs complex AnyValue structures up to the specified depth limit. - If the depth limit of the AnyValue structure is exceeded, returns 0. + Estimates the size in bytes of a log by calculating the size of its body and its attributes + and adding a buffer amount to account for other log metadata information. + Will process complex log structures up to the specified depth limit. + If the depth limit of the log structure is exceeded, returns truncates calculation + to everything up to that point. Args: - val: The AnyValue object to calculate size for + log: The Log object to calculate size for depth: Maximum depth to traverse in nested structures (default: 3) Returns: - int: Total size of the AnyValue object in bytes + int: The estimated size of the log object in bytes """ - # Use a stack to prevent excessive recursive calls. - stack = [(val, 0)] - size: int = 0 - - while stack: - # small optimization. We can stop calculating the size once it reaches the 1 MB limit. - if size >= self._MAX_LOG_REQUEST_BYTE_SIZE: - return size - - next_val, current_depth = stack.pop() - - if isinstance(next_val, (str, bytes)): - size += len(next_val) - continue - - if isinstance(next_val, bool): - size += 4 if next_val else 5 - continue - - if isinstance(next_val, (float, int)): - size += len(str(next_val)) - continue - - if current_depth <= depth: - if isinstance(next_val, Sequence): - for content in next_val: - stack.append((cast(AnyValue, content), current_depth + 1)) - - if isinstance(next_val, Mapping): - for key, content in next_val.items(): - size += len(key) - stack.append((content, current_depth + 1)) - else: - _logger.debug("Max log depth exceeded. Log data size will not be accurately calculated.") - return 0 + + # Use a queue to prevent excessive recursive calls. + # We calculate based on the size of the log record body and attributes for the log. + queue: List[tuple[AnyValue, int]] = [(log.log_record.body, 0), (log.log_record.attributes, -1)] + + size: int = self._BASE_LOG_BUFFER_BYTE_SIZE + + while queue: + new_queue: List[tuple[AnyValue, int]] = [] + + for data in queue: + # small optimization, can stop calculating the size once it reaches the 1 MB limit. + if size >= self._MAX_LOG_REQUEST_BYTE_SIZE: + return size + + next_val, current_depth = data + + if isinstance(next_val, (str, bytes)): + size += len(next_val) + continue + + if isinstance(next_val, bool): + size += 4 if next_val else 5 + continue + + if isinstance(next_val, (float, int)): + size += len(str(next_val)) + continue + + if current_depth <= depth: + if isinstance(next_val, Sequence): + for content in next_val: + new_queue.append((cast(AnyValue, content), current_depth + 1)) + + if isinstance(next_val, Mapping): + for key, content in next_val.items(): + size += len(key) + new_queue.append((content, current_depth + 1)) + else: + _logger.debug( + f"Max log depth of {depth} exceeded. Log data size will not be accurately calculated." + ) + + queue = new_queue return size @staticmethod - def _is_gen_ai_log(log_data: LogData) -> bool: + def _is_gen_ai_log(log: LogData) -> bool: """ Is the log a Gen AI log event? """ @@ -157,4 +165,4 @@ def _is_gen_ai_log(log_data: LogData) -> bool: "openlit.otel.tracing", } - return log_data.instrumentation_scope.name in gen_ai_instrumentations + return log.instrumentation_scope.name in gen_ai_instrumentations diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 64203b434..9bd75d03f 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -10,30 +10,53 @@ import requests from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession -from opentelemetry.exporter.otlp.proto.common._internal import ( - _create_exp_backoff_generator, -) +from opentelemetry.exporter.otlp.proto.common._internal import _create_exp_backoff_generator from opentelemetry.exporter.otlp.proto.common._log_encoder import encode_logs from opentelemetry.exporter.otlp.proto.http import Compression from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter -from opentelemetry.sdk._logs import ( - LogData, -) -from opentelemetry.sdk._logs.export import ( - LogExportResult, -) +from opentelemetry.sdk._logs import LogData +from opentelemetry.sdk._logs.export import LogExportResult _logger = logging.getLogger(__name__) class OTLPAwsLogExporter(OTLPLogExporter): - _LARGE_LOG_HEADER = "x-aws-truncatable-fields" + """ + Below is the protobuf-JSON formatted path to "content" and "role" for the + following GenAI Consolidated Log Event Schema: + + "body": { + "output": { + "messages": [ + { + "content": "hi", + "role": "assistant" + } + ] + }, + "input": { + "messages": [ + { + "content": "hello", + "role": "user" + } + ] + } + } + + """ + _LARGE_GEN_AI_LOG_PATH_HEADER = ( - "\\$['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['body']" - "['kvlistValue']['values'][*]['value']['kvlistValue']['values'][*]" - "['value']['arrayValue']['values'][*]['kvlistValue']['values'][*]" - "['value']['stringValue']" + "\\$['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['body']" # body + "['kvlistValue']['values'][*]['value']" # body['output'], body['input'] + "['kvlistValue']['values'][0]['value']" # body['output']['messages'], body['input']['messages'] + "['arrayValue']['values'][*]" # body['output']['messages'][0..999], body['input']['messages'][0..999] + "['kvlistValue']['values'][*]['value']['stringValue']" # body['output']['messages'][0..999]['content'/'role'], + # body['input']['messages'][0..999]['content'/'role'] ) + + _LARGE_LOG_HEADER = "x-aws-truncatable-fields" + _RETRY_AFTER_HEADER = "Retry-After" # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling def __init__( @@ -160,7 +183,7 @@ def _send(self, serialized_data: bytes): cert=self._client_cert, ) return response - except ConnectionError: + except requests.exceptions.ConnectionError: response = self._session.post( url=self._endpoint, headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py new file mode 100644 index 000000000..e2e364b03 --- /dev/null +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py @@ -0,0 +1,523 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# pylint: disable=no-self-use + +import json +import logging +import time +import uuid +from collections import defaultdict +from typing import Any, Dict, List, Optional, Tuple + +import botocore.session +from botocore.exceptions import ClientError + +from opentelemetry.sdk.metrics import ( + Counter, + Histogram, + ObservableCounter, + ObservableGauge, + ObservableUpDownCounter, + UpDownCounter, +) +from opentelemetry.sdk.metrics._internal.point import Metric +from opentelemetry.sdk.metrics.export import ( + AggregationTemporality, + Gauge, + MetricExporter, + MetricExportResult, + MetricsData, + NumberDataPoint, +) +from opentelemetry.sdk.resources import Resource +from opentelemetry.util.types import Attributes + +logger = logging.getLogger(__name__) + + +class MetricRecord: + """The metric data unified representation of all OTel metrics for OTel to CW EMF conversion.""" + + def __init__(self, metric_name: str, metric_unit: str, metric_description: str): + """ + Initialize metric record. + + Args: + metric_name: Name of the metric + metric_unit: Unit of the metric + metric_description: Description of the metric + """ + # Instrument metadata + self.name = metric_name + self.unit = metric_unit + self.description = metric_description + + # Will be set by conversion methods + self.timestamp: Optional[int] = None + self.attributes: Attributes = {} + + # Different metric type data - only one will be set per record + self.value: Optional[float] = None + self.sum_data: Optional[Any] = None + self.histogram_data: Optional[Any] = None + self.exp_histogram_data: Optional[Any] = None + + +class AwsCloudWatchEmfExporter(MetricExporter): + """ + OpenTelemetry metrics exporter for CloudWatch EMF format. + + This exporter converts OTel metrics into CloudWatch EMF logs which are then + sent to CloudWatch Logs. CloudWatch Logs automatically extracts the metrics + from the EMF logs. + + https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html + + """ + + # CloudWatch EMF supported units + # Ref: https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_MetricDatum.html + EMF_SUPPORTED_UNITS = { + "Seconds", + "Microseconds", + "Milliseconds", + "Bytes", + "Kilobytes", + "Megabytes", + "Gigabytes", + "Terabytes", + "Bits", + "Kilobits", + "Megabits", + "Gigabits", + "Terabits", + "Percent", + "Count", + "Bytes/Second", + "Kilobytes/Second", + "Megabytes/Second", + "Gigabytes/Second", + "Terabytes/Second", + "Bits/Second", + "Kilobits/Second", + "Megabits/Second", + "Gigabits/Second", + "Terabits/Second", + "Count/Second", + "None", + } + + # OTel to CloudWatch unit mapping + # Ref: opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/grouped_metric.go#L188 + UNIT_MAPPING = { + "1": "", + "ns": "", + "ms": "Milliseconds", + "s": "Seconds", + "us": "Microseconds", + "By": "Bytes", + "bit": "Bits", + } + + def __init__( + self, + namespace: str = "default", + log_group_name: str = None, + log_stream_name: Optional[str] = None, + aws_region: Optional[str] = None, + preferred_temporality: Optional[Dict[type, AggregationTemporality]] = None, + **kwargs, + ): + """ + Initialize the CloudWatch EMF exporter. + + Args: + namespace: CloudWatch namespace for metrics + log_group_name: CloudWatch log group name + log_stream_name: CloudWatch log stream name (auto-generated if None) + aws_region: AWS region (auto-detected if None) + preferred_temporality: Optional dictionary mapping instrument types to aggregation temporality + **kwargs: Additional arguments passed to botocore client + """ + # Set up temporality preference default to DELTA if customers not set + if preferred_temporality is None: + preferred_temporality = { + Counter: AggregationTemporality.DELTA, + Histogram: AggregationTemporality.DELTA, + ObservableCounter: AggregationTemporality.DELTA, + ObservableGauge: AggregationTemporality.DELTA, + ObservableUpDownCounter: AggregationTemporality.DELTA, + UpDownCounter: AggregationTemporality.DELTA, + } + + super().__init__(preferred_temporality) + + self.namespace = namespace + self.log_group_name = log_group_name + self.log_stream_name = log_stream_name or self._generate_log_stream_name() + + session = botocore.session.Session() + self.logs_client = session.create_client("logs", region_name=aws_region, **kwargs) + + # Ensure log group exists + self._ensure_log_group_exists() + + # Ensure log stream exists + self._ensure_log_stream_exists() + + # Default to unique log stream name matching OTel Collector + # EMF Exporter behavior with language for source identification + def _generate_log_stream_name(self) -> str: + """Generate a unique log stream name.""" + + unique_id = str(uuid.uuid4())[:8] + return f"otel-python-{unique_id}" + + def _ensure_log_group_exists(self): + """Ensure the log group exists, create if it doesn't.""" + try: + self.logs_client.create_log_group(logGroupName=self.log_group_name) + logger.info("Created log group: %s", self.log_group_name) + except ClientError as error: + if error.response.get("Error", {}).get("Code") == "ResourceAlreadyExistsException": + logger.debug("Log group %s already exists", self.log_group_name) + else: + logger.error("Failed to create log group %s : %s", self.log_group_name, error) + raise + + def _ensure_log_stream_exists(self): + try: + self.logs_client.create_log_stream(logGroupName=self.log_group_name, logStreamName=self.log_stream_name) + logger.info("Created log stream: %s", self.log_stream_name) + except ClientError as error: + if error.response.get("Error", {}).get("Code") == "ResourceAlreadyExistsException": + logger.debug("Log stream %s already exists", self.log_stream_name) + else: + logger.error("Failed to create log stream %s : %s", self.log_group_name, error) + raise + + def _get_metric_name(self, record: MetricRecord) -> Optional[str]: + """Get the metric name from the metric record or data point.""" + + try: + if record.name: + return record.name + except AttributeError: + pass + # Return None if no valid metric name found + return None + + def _get_unit(self, record: MetricRecord) -> Optional[str]: + """Get CloudWatch unit from MetricRecord unit.""" + unit = record.unit + + if not unit: + return None + + # First check if unit is already a supported EMF unit + if unit in self.EMF_SUPPORTED_UNITS: + return unit + + # Map from OTel unit to CloudWatch unit + mapped_unit = self.UNIT_MAPPING.get(unit) + + return mapped_unit + + def _get_dimension_names(self, attributes: Attributes) -> List[str]: + """Extract dimension names from attributes.""" + # Implement dimension selection logic + # For now, use all attributes as dimensions + return list(attributes.keys()) + + def _get_attributes_key(self, attributes: Attributes) -> str: + """ + Create a hashable key from attributes for grouping metrics. + + Args: + attributes: The attributes dictionary + + Returns: + A string representation of sorted attributes key-value pairs + """ + # Sort the attributes to ensure consistent keys + sorted_attrs = sorted(attributes.items()) + # Create a string representation of the attributes + return str(sorted_attrs) + + def _normalize_timestamp(self, timestamp_ns: int) -> int: + """ + Normalize a nanosecond timestamp to milliseconds for CloudWatch. + + Args: + timestamp_ns: Timestamp in nanoseconds + + Returns: + Timestamp in milliseconds + """ + # Convert from nanoseconds to milliseconds + return timestamp_ns // 1_000_000 + + def _create_metric_record(self, metric_name: str, metric_unit: str, metric_description: str) -> MetricRecord: + """ + Creates the intermediate metric data structure that standardizes different otel metric representation + and will be used to generate EMF events. The base record + establishes the instrument schema (name/unit/description) that will be populated + with dimensions, timestamps, and values during metric processing. + + Args: + metric_name: Name of the metric + metric_unit: Unit of the metric + metric_description: Description of the metric + + Returns: + A MetricRecord object + """ + return MetricRecord(metric_name, metric_unit, metric_description) + + def _convert_gauge(self, metric: Metric, data_point: NumberDataPoint) -> MetricRecord: + """Convert a Gauge metric datapoint to a metric record. + + Args: + metric: The metric object + data_point: The datapoint to convert + + Returns: + MetricRecord with populated timestamp, attributes, and value + """ + # Create base record + record = self._create_metric_record(metric.name, metric.unit, metric.description) + + # Set timestamp + try: + timestamp_ms = ( + self._normalize_timestamp(data_point.time_unix_nano) + if data_point.time_unix_nano is not None + else int(time.time() * 1000) + ) + except AttributeError: + # data_point doesn't have time_unix_nano attribute + timestamp_ms = int(time.time() * 1000) + record.timestamp = timestamp_ms + + # Set attributes + try: + record.attributes = data_point.attributes + except AttributeError: + # data_point doesn't have attributes + record.attributes = {} + + # For Gauge, set the value directly + try: + record.value = data_point.value + except AttributeError: + # data_point doesn't have value + record.value = None + + return record + + def _group_by_attributes_and_timestamp(self, record: MetricRecord) -> Tuple[str, int]: + """Group metric record by attributes and timestamp. + + Args: + record: The metric record + + Returns: + A tuple key for grouping + """ + # Create a key for grouping based on attributes + attrs_key = self._get_attributes_key(record.attributes) + return (attrs_key, record.timestamp) + + def _create_emf_log( + self, metric_records: List[MetricRecord], resource: Resource, timestamp: Optional[int] = None + ) -> Dict: + """ + Create EMF log dictionary from metric records. + + Since metric_records is already grouped by attributes, this function + creates a single EMF log for all records. + """ + # Start with base structure + emf_log = {"_aws": {"Timestamp": timestamp or int(time.time() * 1000), "CloudWatchMetrics": []}} + + # Set with latest EMF version schema + # opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/metric_translator.go#L414 + emf_log["Version"] = "1" + + # Add resource attributes to EMF log but not as dimensions + # OTel collector EMF Exporter has a resource_to_telemetry_conversion flag that will convert resource attributes + # as regular metric attributes(potential dimensions). However, for this SDK EMF implementation, + # we align with the OpenTelemetry concept that all metric attributes are treated as dimensions. + # And have resource attributes as just additional metadata in EMF, added otel.resource as prefix to distinguish. + if resource and resource.attributes: + for key, value in resource.attributes.items(): + emf_log[f"otel.resource.{key}"] = str(value) + + # Initialize collections for dimensions and metrics + metric_definitions = [] + # Collect attributes from all records (they should be the same for all records in the group) + # Only collect once from the first record and apply to all records + all_attributes = metric_records[0].attributes if metric_records and metric_records[0].attributes else {} + + # Process each metric record + for record in metric_records: + + metric_name = self._get_metric_name(record) + + # Skip processing if metric name is None or empty + if not metric_name: + continue + + # Skip processing if metric value is None or empty + if record.value is None: + logger.debug("Skipping metric %s as it does not have valid metric value", metric_name) + continue + + # Create metric data dict + metric_data = {"Name": metric_name} + + unit = self._get_unit(record) + if unit: + metric_data["Unit"] = unit + + # Add to metric definitions list + metric_definitions.append(metric_data) + + emf_log[metric_name] = record.value + + # Get dimension names from collected attributes + dimension_names = self._get_dimension_names(all_attributes) + + # Add attribute values to the root of the EMF log + for name, value in all_attributes.items(): + emf_log[name] = str(value) + + # Add the single dimension set to CloudWatch Metrics if we have dimensions and metrics + if dimension_names and metric_definitions: + emf_log["_aws"]["CloudWatchMetrics"].append( + {"Namespace": self.namespace, "Dimensions": [dimension_names], "Metrics": metric_definitions} + ) + + return emf_log + + # pylint: disable=no-member + def _send_log_event(self, log_event: Dict[str, Any]): + """ + Send a log event to CloudWatch Logs. + + Basic implementation for PR 1 - sends individual events directly. + + TODO: Batching event and follow CloudWatch Logs quato constraints - number of events & size limit per payload + """ + try: + # Send the log event + response = self.logs_client.put_log_events( + logGroupName=self.log_group_name, logStreamName=self.log_stream_name, logEvents=[log_event] + ) + + logger.debug("Successfully sent log event") + return response + + except ClientError as error: + logger.debug("Failed to send log event: %s", error) + raise + + # pylint: disable=too-many-nested-blocks + def export( + self, metrics_data: MetricsData, timeout_millis: Optional[int] = None, **kwargs: Any + ) -> MetricExportResult: + """ + Export metrics as EMF logs to CloudWatch. + + Groups metrics by attributes and timestamp before creating EMF logs. + + Args: + metrics_data: MetricsData containing resource metrics and scope metrics + timeout_millis: Optional timeout in milliseconds + **kwargs: Additional keyword arguments + + Returns: + MetricExportResult indicating success or failure + """ + try: + if not metrics_data.resource_metrics: + return MetricExportResult.SUCCESS + + # Process all metrics from all resource metrics and scope metrics + for resource_metrics in metrics_data.resource_metrics: + for scope_metrics in resource_metrics.scope_metrics: + # Dictionary to group metrics by attributes and timestamp + grouped_metrics = defaultdict(list) + + # Process all metrics in this scope + for metric in scope_metrics.metrics: + # Skip if metric.data is None or no data_points exists + try: + if not (metric.data and metric.data.data_points): + continue + except AttributeError: + # Metric doesn't have data or data_points attribute + continue + + # Process metrics based on type + metric_type = type(metric.data) + if metric_type == Gauge: + for dp in metric.data.data_points: + record = self._convert_gauge(metric, dp) + grouped_metrics[self._group_by_attributes_and_timestamp(record)].append(record) + else: + logger.debug("Unsupported Metric Type: %s", metric_type) + + # Now process each group separately to create one EMF log per group + for (_, timestamp_ms), metric_records in grouped_metrics.items(): + if not metric_records: + continue + + # Create and send EMF log for this batch of metrics + self._send_log_event( + { + "message": json.dumps( + self._create_emf_log(metric_records, resource_metrics.resource, timestamp_ms) + ), + "timestamp": timestamp_ms, + } + ) + + return MetricExportResult.SUCCESS + # pylint: disable=broad-exception-caught + # capture all types of exceptions to not interrupt the instrumented services + except Exception as error: + logger.error("Failed to export metrics: %s", error) + return MetricExportResult.FAILURE + + def force_flush(self, timeout_millis: int = 10000) -> bool: + """ + Force flush any pending metrics. + + TODO: will add logic to handle gracefule shutdown + + Args: + timeout_millis: Timeout in milliseconds + + Returns: + True if successful, False otherwise + """ + logger.debug("AwsCloudWatchEmfExporter force flushes the buffered metrics") + return True + + def shutdown(self, timeout_millis: Optional[int] = None, **kwargs: Any) -> bool: + """ + Shutdown the exporter. + Override to handle timeout and other keyword arguments, but do nothing. + + TODO: will add logic to handle gracefule shutdown + + Args: + timeout_millis: Ignored timeout in milliseconds + **kwargs: Ignored additional keyword arguments + """ + # Intentionally do nothing + self.force_flush(timeout_millis) + logger.debug("AwsCloudWatchEmfExporter shutdown called with timeout_millis=%s", timeout_millis) + return True diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py index a25e55330..549154771 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py @@ -2,13 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import abc import inspect -import io -import json import logging -import math -from typing import Any, Dict, Optional - -from botocore.response import StreamingBody +from typing import Dict, Optional from amazon.opentelemetry.distro._aws_attribute_keys import ( AWS_BEDROCK_AGENT_ID, @@ -17,20 +12,11 @@ AWS_BEDROCK_GUARDRAIL_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID, ) -from amazon.opentelemetry.distro._aws_span_processing_util import ( - GEN_AI_REQUEST_MAX_TOKENS, - GEN_AI_REQUEST_MODEL, - GEN_AI_REQUEST_TEMPERATURE, - GEN_AI_REQUEST_TOP_P, - GEN_AI_RESPONSE_FINISH_REASONS, - GEN_AI_SYSTEM, - GEN_AI_USAGE_INPUT_TOKENS, - GEN_AI_USAGE_OUTPUT_TOKENS, -) from opentelemetry.instrumentation.botocore.extensions.types import ( _AttributeMapT, _AwsSdkCallContext, _AwsSdkExtension, + _BotocoreInstrumentorContext, _BotoResultT, ) from opentelemetry.trace.span import Span @@ -192,7 +178,7 @@ def extract_attributes(self, attributes: _AttributeMapT): if request_param_value: attributes[attribute_key] = request_param_value - def on_success(self, span: Span, result: _BotoResultT): + def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): if self._operation_class is None: return @@ -229,7 +215,7 @@ class _BedrockExtension(_AwsSdkExtension): """ # pylint: disable=no-self-use - def on_success(self, span: Span, result: _BotoResultT): + def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): # _GUARDRAIL_ID can only be retrieved from the response, not from the request guardrail_id = result.get(_GUARDRAIL_ID) if guardrail_id: @@ -244,205 +230,3 @@ def on_success(self, span: Span, result: _BotoResultT): AWS_BEDROCK_GUARDRAIL_ARN, guardrail_arn, ) - - -class _BedrockRuntimeExtension(_AwsSdkExtension): - """ - This class is an extension for - Amazon Bedrock Runtime. - """ - - def extract_attributes(self, attributes: _AttributeMapT): - attributes[GEN_AI_SYSTEM] = _AWS_BEDROCK_SYSTEM - - model_id = self._call_context.params.get(_MODEL_ID) - if model_id: - attributes[GEN_AI_REQUEST_MODEL] = model_id - - # Get the request body if it exists - body = self._call_context.params.get("body") - if body: - try: - request_body = json.loads(body) - - if "amazon.titan" in model_id: - self._extract_titan_attributes(attributes, request_body) - if "amazon.nova" in model_id: - self._extract_nova_attributes(attributes, request_body) - elif "anthropic.claude" in model_id: - self._extract_claude_attributes(attributes, request_body) - elif "meta.llama" in model_id: - self._extract_llama_attributes(attributes, request_body) - elif "cohere.command" in model_id: - self._extract_cohere_attributes(attributes, request_body) - elif "ai21.jamba" in model_id: - self._extract_ai21_attributes(attributes, request_body) - elif "mistral" in model_id: - self._extract_mistral_attributes(attributes, request_body) - - except json.JSONDecodeError: - _logger.debug("Error: Unable to parse the body as JSON") - - def _extract_titan_attributes(self, attributes, request_body): - config = request_body.get("textGenerationConfig", {}) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("topP")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount")) - - def _extract_nova_attributes(self, attributes, request_body): - config = request_body.get("inferenceConfig", {}) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("top_p")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens")) - - def _extract_claude_attributes(self, attributes, request_body): - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) - - def _extract_cohere_attributes(self, attributes, request_body): - prompt = request_body.get("message") - if prompt: - attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6) - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p")) - - def _extract_ai21_attributes(self, attributes, request_body): - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) - - def _extract_llama_attributes(self, attributes, request_body): - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) - - def _extract_mistral_attributes(self, attributes, request_body): - prompt = request_body.get("prompt") - if prompt: - attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6) - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) - - @staticmethod - def _set_if_not_none(attributes, key, value): - if value is not None: - attributes[key] = value - - # pylint: disable=too-many-branches - def on_success(self, span: Span, result: Dict[str, Any]): - model_id = self._call_context.params.get(_MODEL_ID) - - if not model_id: - return - - if "body" in result and isinstance(result["body"], StreamingBody): - original_body = None - try: - original_body = result["body"] - body_content = original_body.read() - - # Use one stream for telemetry - stream = io.BytesIO(body_content) - telemetry_content = stream.read() - response_body = json.loads(telemetry_content.decode("utf-8")) - if "amazon.titan" in model_id: - self._handle_amazon_titan_response(span, response_body) - if "amazon.nova" in model_id: - self._handle_amazon_nova_response(span, response_body) - elif "anthropic.claude" in model_id: - self._handle_anthropic_claude_response(span, response_body) - elif "meta.llama" in model_id: - self._handle_meta_llama_response(span, response_body) - elif "cohere.command" in model_id: - self._handle_cohere_command_response(span, response_body) - elif "ai21.jamba" in model_id: - self._handle_ai21_jamba_response(span, response_body) - elif "mistral" in model_id: - self._handle_mistral_mistral_response(span, response_body) - # Replenish stream for downstream application use - new_stream = io.BytesIO(body_content) - result["body"] = StreamingBody(new_stream, len(body_content)) - - except json.JSONDecodeError: - _logger.debug("Error: Unable to parse the response body as JSON") - except Exception as e: # pylint: disable=broad-exception-caught, invalid-name - _logger.debug("Error processing response: %s", e) - finally: - if original_body is not None: - original_body.close() - - # pylint: disable=no-self-use - def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]): - if "inputTextTokenCount" in response_body: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"]) - if "results" in response_body and response_body["results"]: - result = response_body["results"][0] - if "tokenCount" in result: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"]) - if "completionReason" in result: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result["completionReason"]]) - - # pylint: disable=no-self-use - def _handle_amazon_nova_response(self, span: Span, response_body: Dict[str, Any]): - if "usage" in response_body: - usage = response_body["usage"] - if "inputTokens" in usage: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["inputTokens"]) - if "outputTokens" in usage: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["outputTokens"]) - if "stopReason" in response_body: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]]) - - # pylint: disable=no-self-use - def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]): - if "usage" in response_body: - usage = response_body["usage"] - if "input_tokens" in usage: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"]) - if "output_tokens" in usage: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"]) - if "stop_reason" in response_body: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]) - - # pylint: disable=no-self-use - def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]): - # Output tokens: Approximate from the response text - if "text" in response_body: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / 6)) - if "finish_reason" in response_body: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]]) - - # pylint: disable=no-self-use - def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]): - if "usage" in response_body: - usage = response_body["usage"] - if "prompt_tokens" in usage: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"]) - if "completion_tokens" in usage: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"]) - if "choices" in response_body: - choices = response_body["choices"][0] - if "finish_reason" in choices: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]]) - - # pylint: disable=no-self-use - def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]): - if "prompt_token_count" in response_body: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"]) - if "generation_token_count" in response_body: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"]) - if "stop_reason" in response_body: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]) - - # pylint: disable=no-self-use - def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]): - if "outputs" in response_body: - outputs = response_body["outputs"][0] - if "text" in outputs: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / 6)) - if "stop_reason" in outputs: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]]) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py index 0f4a77d1e..ffc81b348 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py @@ -19,13 +19,17 @@ _BedrockAgentExtension, _BedrockAgentRuntimeExtension, _BedrockExtension, - _BedrockRuntimeExtension, ) from opentelemetry.instrumentation.botocore.extensions import _KNOWN_EXTENSIONS from opentelemetry.instrumentation.botocore.extensions.lmbd import _LambdaExtension from opentelemetry.instrumentation.botocore.extensions.sns import _SnsExtension from opentelemetry.instrumentation.botocore.extensions.sqs import _SqsExtension -from opentelemetry.instrumentation.botocore.extensions.types import _AttributeMapT, _AwsSdkExtension, _BotoResultT +from opentelemetry.instrumentation.botocore.extensions.types import ( + _AttributeMapT, + _AwsSdkExtension, + _BotocoreInstrumentorContext, + _BotoResultT, +) from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace.span import Span @@ -75,8 +79,8 @@ def patch_extract_attributes(self, attributes: _AttributeMapT): old_on_success = _LambdaExtension.on_success - def patch_on_success(self, span: Span, result: _BotoResultT): - old_on_success(self, span, result) + def patch_on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): + old_on_success(self, span, result, instrumentor_context) lambda_configuration = result.get("Configuration", {}) function_arn = lambda_configuration.get("FunctionArn") if function_arn: @@ -180,8 +184,8 @@ def patch_extract_attributes(self, attributes: _AttributeMapT): old_on_success = _SqsExtension.on_success - def patch_on_success(self, span: Span, result: _BotoResultT): - old_on_success(self, span, result) + def patch_on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): + old_on_success(self, span, result, instrumentor_context) queue_url = result.get("QueueUrl") if queue_url: span.set_attribute(AWS_SQS_QUEUE_URL, queue_url) @@ -191,17 +195,17 @@ def patch_on_success(self, span: Span, result: _BotoResultT): def _apply_botocore_bedrock_patch() -> None: - """Botocore instrumentation patch for Bedrock, Bedrock Agent, Bedrock Runtime and Bedrock Agent Runtime + """Botocore instrumentation patch for Bedrock, Bedrock Agent, and Bedrock Agent Runtime This patch adds an extension to the upstream's list of known extension for Bedrock. Extensions allow for custom logic for adding service-specific information to spans, such as attributes. - Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys, - GEN_AI_REQUEST_MODEL and GEN_AI_SYSTEM attributes referenced in _aws_span_processing_util. + Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys. + Note: Bedrock Runtime uses the upstream extension directly. """ _KNOWN_EXTENSIONS["bedrock"] = _lazy_load(".", "_BedrockExtension") _KNOWN_EXTENSIONS["bedrock-agent"] = _lazy_load(".", "_BedrockAgentExtension") _KNOWN_EXTENSIONS["bedrock-agent-runtime"] = _lazy_load(".", "_BedrockAgentRuntimeExtension") - _KNOWN_EXTENSIONS["bedrock-runtime"] = _lazy_load(".", "_BedrockRuntimeExtension") + # bedrock-runtime is handled by upstream # The OpenTelemetry Authors code @@ -243,7 +247,7 @@ def extract_attributes(self, attributes: _AttributeMapT): attributes[AWS_SECRETSMANAGER_SECRET_ARN] = secret_id # pylint: disable=no-self-use - def on_success(self, span: Span, result: _BotoResultT): + def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): secret_arn = result.get("ARN") if secret_arn: span.set_attribute(AWS_SECRETSMANAGER_SECRET_ARN, secret_arn) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py index e0c62b89d..85a6c8958 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -1,5 +1,6 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +from importlib.metadata import PackageNotFoundError from unittest import TestCase from unittest.mock import patch @@ -19,11 +20,12 @@ class TestAwsAuthSession(TestCase): - @patch("pkg_resources.get_distribution", side_effect=ImportError("test error")) - @patch.dict("sys.modules", {"botocore": None}, clear=False) + @patch("amazon.opentelemetry.distro._utils.version") + @patch.dict("sys.modules", {"botocore": None}) @patch("requests.Session.request", return_value=requests.Response()) - def test_aws_auth_session_no_botocore(self, _, __): + def test_aws_auth_session_no_botocore(self, mock_request, mock_version): """Tests that aws_auth_session will not inject SigV4 Headers if botocore is not installed.""" + mock_version.side_effect = PackageNotFoundError("botocore") session = AwsAuthSession("us-east-1", "xray") actual_headers = {"test": "test"} @@ -61,3 +63,18 @@ def test_aws_auth_session(self, _, __): self.assertIn(AUTHORIZATION_HEADER, actual_headers) self.assertIn(X_AMZ_DATE_HEADER, actual_headers) self.assertIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + @patch("botocore.auth.SigV4Auth.add_auth", side_effect=Exception("Signing failed")) + def test_aws_auth_session_signing_error(self, mock_add_auth, mock_get_credentials, mock_request): + """Tests that aws_auth_session does not any Sigv4 headers if signing errors.""" + + session = AwsAuthSession("us-east-1", "xray") + actual_headers = {"test": "test"} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) + self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) + self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py similarity index 54% rename from aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py rename to aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 1abf680f1..346b44291 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -22,55 +22,75 @@ def setUp(self): self.mock_exporter.export.return_value = LogExportResult.SUCCESS self.processor = AwsBatchLogRecordProcessor(exporter=self.mock_exporter) + self.max_log_size = self.processor._MAX_LOG_REQUEST_BYTE_SIZE + self.base_log_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE def test_process_log_data_nested_structure(self): """Tests that the processor correctly handles nested structures (dict/list)""" message_size = 400 - depth = 2 + message = "X" * message_size - nested_dict_log_body = self.generate_nested_log_body( - depth=depth, expected_body="X" * message_size, create_map=True + nest_dict_log = self.generate_test_log_data( + log_body=message, attr_key="t", attr_val=message, log_body_depth=2, attr_depth=2, count=1, create_map=True ) - nested_array_log_body = self.generate_nested_log_body( - depth=depth, expected_body="X" * message_size, create_map=False + nest_array_log = self.generate_test_log_data( + log_body=message, attr_key="t", attr_val=message, log_body_depth=2, attr_depth=2, count=1, create_map=False ) - dict_size = self.processor._get_any_value_size(val=nested_dict_log_body, depth=depth) - array_size = self.processor._get_any_value_size(val=nested_array_log_body, depth=depth) + expected_size = self.base_log_size + message_size * 2 - # Asserting almost equal to account for key lengths in the Log object body - self.assertAlmostEqual(dict_size, message_size, delta=20) - self.assertAlmostEqual(array_size, message_size, delta=20) + dict_size = self.processor._estimate_log_size(log=nest_dict_log[0], depth=2) + array_size = self.processor._estimate_log_size(log=nest_array_log[0], depth=2) + + # Asserting almost equal to account for dictionary keys in the Log object + self.assertAlmostEqual(dict_size, expected_size, delta=10) + self.assertAlmostEqual(array_size, expected_size, delta=10) def test_process_log_data_nested_structure_exceeds_depth(self): - """Tests that the processor returns 0 for nested structure that exceeds the depth limit""" - message_size = 400 - log_body = "X" * message_size + """Tests that the processor cuts off calculation for nested structure that exceeds the depth limit""" + calculated = "X" * 400 + message = {"calculated": calculated, "truncated": {"truncated": {"test": "X" * self.max_log_size}}} - nested_dict_log_body = self.generate_nested_log_body(depth=4, expected_body=log_body, create_map=True) - nested_array_log_body = self.generate_nested_log_body(depth=4, expected_body=log_body, create_map=False) + # *2 since we set this message in both body and attributes + expected_size = self.base_log_size + (len("calculated") + len(calculated) + len("truncated")) * 2 + + nest_dict_log = self.generate_test_log_data( + log_body=message, attr_key="t", attr_val=message, log_body_depth=3, attr_depth=3, count=1, create_map=True + ) + nest_array_log = self.generate_test_log_data( + log_body=message, attr_key="t", attr_val=message, log_body_depth=3, attr_depth=3, count=1, create_map=False + ) - dict_size = self.processor._get_any_value_size(val=nested_dict_log_body, depth=3) - array_size = self.processor._get_any_value_size(val=nested_array_log_body, depth=3) + # Only calculates log size of up to depth of 4 + dict_size = self.processor._estimate_log_size(log=nest_dict_log[0], depth=4) + array_size = self.processor._estimate_log_size(log=nest_array_log[0], depth=4) - self.assertEqual(dict_size, 0) - self.assertEqual(array_size, 0) + # Asserting almost equal to account for dictionary keys in the Log object body + self.assertAlmostEqual(dict_size, expected_size, delta=10) + self.assertAlmostEqual(array_size, expected_size, delta=10) def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): """Tests that the processor returns prematurely if the size already exceeds _MAX_LOG_REQUEST_BYTE_SIZE""" - log_body = { - "smallKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE // 2), - "bigKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1), + # Should stop calculation at bigKey + message = { + "bigKey": "X" * (self.max_log_size), + "smallKey": "X" * (self.max_log_size * 10), } - nested_dict_log_body = self.generate_nested_log_body(depth=0, expected_body=log_body, create_map=True) - nested_array_log_body = self.generate_nested_log_body(depth=0, expected_body=log_body, create_map=False) + expected_size = self.base_log_size + self.max_log_size + len("bigKey") + + nest_dict_log = self.generate_test_log_data( + log_body=message, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=1, create_map=True + ) + nest_array_log = self.generate_test_log_data( + log_body=message, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=1, create_map=False + ) - dict_size = self.processor._get_any_value_size(val=nested_dict_log_body) - array_size = self.processor._get_any_value_size(val=nested_array_log_body) + dict_size = self.processor._estimate_log_size(log=nest_dict_log[0]) + array_size = self.processor._estimate_log_size(log=nest_array_log[0]) - self.assertAlmostEqual(dict_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE, delta=20) - self.assertAlmostEqual(array_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE, delta=20) + self.assertAlmostEqual(dict_size, expected_size, delta=10) + self.assertAlmostEqual(array_size, expected_size, delta=10) def test_process_log_data_primitive(self): @@ -78,10 +98,18 @@ def test_process_log_data_primitive(self): expected_sizes = [4, 4, 1, 3, 4, 5, 0] for i in range(len(primitives)): - body = primitives[i] - expected_size = expected_sizes[i] + log = self.generate_test_log_data( + log_body=primitives[i], + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=1, + ) + + expected_size = self.base_log_size + expected_sizes[i] + actual_size = self.processor._estimate_log_size(log[0]) - actual_size = self.processor._get_any_value_size(body) self.assertEqual(actual_size, expected_size) @patch( @@ -94,11 +122,13 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): """Tests that export is only called once if a single batch is under the size limit""" log_count = 10 log_body = "test" - test_logs = self.generate_test_log_data(count=log_count, log_body=log_body) + test_logs = self.generate_test_log_data( + log_body=log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=log_count + ) total_data_size = 0 for log in test_logs: - size = self.processor._get_any_value_size(log.log_record.body) + size = self.processor._estimate_log_size(log) total_data_size += size self.processor._queue.appendleft(log) @@ -123,7 +153,9 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): But should only call set_gen_ai_log_flag if it's a Gen AI log event.""" large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) - non_gen_ai_test_logs = self.generate_test_log_data(count=3, log_body=large_log_body) + non_gen_ai_test_logs = self.generate_test_log_data( + log_body=large_log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=3 + ) gen_ai_test_logs = [] gen_ai_scopes = [ @@ -137,7 +169,13 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): for gen_ai_scope in gen_ai_scopes: gen_ai_test_logs.extend( self.generate_test_log_data( - count=1, log_body=large_log_body, instrumentation_scope=InstrumentationScope(gen_ai_scope, "1.0.0") + log_body=large_log_body, + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=3, + instrumentation_scope=InstrumentationScope(gen_ai_scope, "1.0.0"), ) ) @@ -165,18 +203,35 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): """Should make calls to export smaller sub-batch logs""" - large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) + large_log_body = "X" * (self.max_log_size + 1) + small_log_body = "X" * (self.max_log_size // 10 - self.base_log_size) + gen_ai_scope = InstrumentationScope("openinference.instrumentation.langchain", "1.0.0") - small_log_body = "X" * ( - int(self.processor._MAX_LOG_REQUEST_BYTE_SIZE / 10) - self.processor._BASE_LOG_BUFFER_BYTE_SIZE + + large_logs = self.generate_test_log_data( + log_body=large_log_body, + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=3, + instrumentation_scope=gen_ai_scope, + ) + + small_logs = self.generate_test_log_data( + log_body=small_log_body, + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=12, + instrumentation_scope=gen_ai_scope, ) - test_logs = self.generate_test_log_data(count=3, log_body=large_log_body, instrumentation_scope=gen_ai_scope) + # 1st, 2nd, 3rd batch = size 1 # 4th batch = size 10 # 5th batch = size 2 - small_logs = self.generate_test_log_data(count=12, log_body=small_log_body, instrumentation_scope=gen_ai_scope) - - test_logs.extend(small_logs) + test_logs = large_logs + small_logs for log in test_logs: self.processor._queue.appendleft(log) @@ -202,10 +257,29 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): expected_size = expected_sizes[i] self.assertEqual(len(batch), expected_size) + @staticmethod def generate_test_log_data( - self, log_body: AnyValue, count=5, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0") + log_body, + attr_key, + attr_val, + log_body_depth=3, + attr_depth=3, + count=5, + create_map=True, + instrumentation_scope=InstrumentationScope("test-scope", "1.0.0"), ) -> List[LogData]: + + def generate_nested_value(depth, value, create_map=True) -> AnyValue: + if depth < 0: + return value + + if create_map: + return {"t": generate_nested_value(depth - 1, value, True)} + + return [generate_nested_value(depth - 1, value, False)] + logs = [] + for i in range(count): record = LogRecord( timestamp=int(time.time_ns()), @@ -214,23 +288,11 @@ def generate_test_log_data( trace_flags=TraceFlags(1), severity_text="INFO", severity_number=SeverityNumber.INFO, - body=log_body, - attributes={"test.attribute": f"value-{i + 1}"}, + body=generate_nested_value(log_body_depth, log_body, create_map), + attributes={attr_key: generate_nested_value(attr_depth, attr_val, create_map)}, ) log_data = LogData(log_record=record, instrumentation_scope=instrumentation_scope) logs.append(log_data) return logs - - @staticmethod - def generate_nested_log_body(depth=0, expected_body: AnyValue = "test", create_map=True): - if depth < 0: - return expected_body - - if create_map: - return { - "key": TestAwsBatchLogRecordProcessor.generate_nested_log_body(depth - 1, expected_body, create_map) - } - - return [TestAwsBatchLogRecordProcessor.generate_nested_log_body(depth - 1, expected_body, create_map)] diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py similarity index 88% rename from aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py rename to aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 9f6d84b32..82491bc01 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -8,9 +8,7 @@ from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from opentelemetry._logs.severity import SeverityNumber from opentelemetry.sdk._logs import LogData, LogRecord -from opentelemetry.sdk._logs.export import ( - LogExportResult, -) +from opentelemetry.sdk._logs.export import LogExportResult from opentelemetry.sdk.util.instrumentation import InstrumentationScope from opentelemetry.trace import TraceFlags @@ -38,7 +36,7 @@ def setUp(self): self.logs = self.generate_test_log_data() self.exporter = OTLPAwsLogExporter(endpoint=self._ENDPOINT) - @patch("requests.Session.request", return_value=good_response) + @patch("requests.Session.post", return_value=good_response) def test_export_success(self, mock_request): """Tests that the exporter always compresses the serialized logs with gzip before exporting.""" result = self.exporter.export(self.logs) @@ -56,7 +54,7 @@ def test_export_success(self, mock_request): self.assertTrue(len(data) >= 10) self.assertEqual(data[0:2], b"\x1f\x8b") - @patch("requests.Session.request", return_value=good_response) + @patch("requests.Session.post", return_value=good_response) def test_export_gen_ai_logs(self, mock_request): """Tests that when set_gen_ai_log_flag is set, the exporter includes the LLO header in the request.""" @@ -74,7 +72,7 @@ def test_export_gen_ai_logs(self, mock_request): self.assertIn(self.exporter._LARGE_LOG_HEADER, headers) self.assertEqual(headers[self.exporter._LARGE_LOG_HEADER], self.exporter._LARGE_GEN_AI_LOG_PATH_HEADER) - @patch("requests.Session.request", return_value=good_response) + @patch("requests.Session.post", return_value=good_response) def test_should_not_export_if_shutdown(self, mock_request): """Tests that no export request is made if the exporter is shutdown.""" self.exporter.shutdown() @@ -83,7 +81,7 @@ def test_should_not_export_if_shutdown(self, mock_request): mock_request.assert_not_called() self.assertEqual(result, LogExportResult.FAILURE) - @patch("requests.Session.request", return_value=non_retryable_response) + @patch("requests.Session.post", return_value=non_retryable_response) def test_should_not_export_again_if_not_retryable(self, mock_request): """Tests that only one export request is made if the response status code is non-retryable.""" result = self.exporter.export(self.logs) @@ -94,7 +92,7 @@ def test_should_not_export_again_if_not_retryable(self, mock_request): @patch( "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None ) - @patch("requests.Session.request", return_value=retryable_response_no_header) + @patch("requests.Session.post", return_value=retryable_response_no_header) def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header(self, mock_request, mock_sleep): """Tests that multiple export requests are made with exponential delay if the response status code is retryable. But there is no Retry-After header.""" @@ -116,7 +114,7 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None ) @patch( - "requests.Session.request", + "requests.Session.post", side_effect=[retryable_response_header, retryable_response_header, retryable_response_header, good_response], ) def test_should_export_again_with_server_delay_if_retryable_and_retry_after_header(self, mock_request, mock_sleep): @@ -136,7 +134,7 @@ def test_should_export_again_with_server_delay_if_retryable_and_retry_after_head "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None ) @patch( - "requests.Session.request", + "requests.Session.post", side_effect=[ retryable_response_bad_header, retryable_response_bad_header, @@ -159,6 +157,14 @@ def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after self.assertEqual(mock_request.call_count, 4) self.assertEqual(result, LogExportResult.SUCCESS) + @patch("requests.Session.post", side_effect=[requests.exceptions.ConnectionError(), good_response]) + def test_export_connection_error_retry(self, mock_request): + """Tests that the exporter retries on ConnectionError.""" + result = self.exporter.export(self.logs) + + self.assertEqual(mock_request.call_count, 2) + self.assertEqual(result, LogExportResult.SUCCESS) + def generate_test_log_data(self, count=5): logs = [] for i in range(count): diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py new file mode 100644 index 000000000..3ea6031c3 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py @@ -0,0 +1,547 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import time +import unittest +from unittest.mock import Mock, patch + +from botocore.exceptions import ClientError + +from amazon.opentelemetry.distro.exporter.otlp.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter +from opentelemetry.sdk.metrics.export import Gauge, MetricExportResult +from opentelemetry.sdk.resources import Resource + + +class MockDataPoint: + """Mock datapoint for testing.""" + + def __init__(self, value=10.0, attributes=None, time_unix_nano=None): + self.value = value + self.attributes = attributes or {} + self.time_unix_nano = time_unix_nano or int(time.time() * 1_000_000_000) + + +class MockMetric: + """Mock metric for testing.""" + + def __init__(self, name="test_metric", unit="1", description="Test metric"): + self.name = name + self.unit = unit + self.description = description + + +class MockGaugeData: + """Mock gauge data that passes isinstance checks.""" + + def __init__(self, data_points=None): + self.data_points = data_points or [] + + +class MockMetricWithData: + """Mock metric with data attribute.""" + + def __init__(self, name="test_metric", unit="1", description="Test metric", data=None): + self.name = name + self.unit = unit + self.description = description + self.data = data or MockGaugeData() + + +class MockResourceMetrics: + """Mock resource metrics for testing.""" + + def __init__(self, resource=None, scope_metrics=None): + self.resource = resource or Resource.create({"service.name": "test-service"}) + self.scope_metrics = scope_metrics or [] + + +class MockScopeMetrics: + """Mock scope metrics for testing.""" + + def __init__(self, scope=None, metrics=None): + self.scope = scope or Mock() + self.metrics = metrics or [] + + +# pylint: disable=too-many-public-methods +class TestAwsCloudWatchEmfExporter(unittest.TestCase): + """Test AwsCloudWatchEmfExporter class.""" + + def setUp(self): + """Set up test fixtures.""" + # Mock the botocore session to avoid AWS calls + with patch("botocore.session.Session") as mock_session: + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.create_log_group.return_value = {} + mock_client.create_log_stream.return_value = {} + + self.exporter = AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + + def test_initialization(self): + """Test exporter initialization.""" + self.assertEqual(self.exporter.namespace, "TestNamespace") + self.assertIsNotNone(self.exporter.log_stream_name) + self.assertEqual(self.exporter.log_group_name, "test-log-group") + + @patch("botocore.session.Session") + def test_initialization_with_custom_params(self, mock_session): + """Test exporter initialization with custom parameters.""" + # Mock the botocore session to avoid AWS calls + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.create_log_group.return_value = {} + mock_client.create_log_stream.return_value = {} + + exporter = AwsCloudWatchEmfExporter( + namespace="CustomNamespace", + log_group_name="custom-log-group", + log_stream_name="custom-stream", + aws_region="us-west-2", + ) + self.assertEqual(exporter.namespace, "CustomNamespace") + self.assertEqual(exporter.log_group_name, "custom-log-group") + self.assertEqual(exporter.log_stream_name, "custom-stream") + + def test_get_unit_mapping(self): + """Test unit mapping functionality.""" + # Test known units from UNIT_MAPPING + self.assertEqual( + self.exporter._get_unit(self.exporter._create_metric_record("test", "ms", "test")), "Milliseconds" + ) + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "s", "test")), "Seconds") + self.assertEqual( + self.exporter._get_unit(self.exporter._create_metric_record("test", "us", "test")), "Microseconds" + ) + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "By", "test")), "Bytes") + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "bit", "test")), "Bits") + + # Test units that map to empty string (should return empty string from mapping) + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "1", "test")), "") + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "ns", "test")), "") + + # Test EMF supported units directly (should return as-is) + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "Count", "test")), "Count") + self.assertEqual( + self.exporter._get_unit(self.exporter._create_metric_record("test", "Percent", "test")), "Percent" + ) + self.assertEqual( + self.exporter._get_unit(self.exporter._create_metric_record("test", "Kilobytes", "test")), "Kilobytes" + ) + + # Test unknown unit (not in mapping and not in supported units, returns None) + self.assertIsNone(self.exporter._get_unit(self.exporter._create_metric_record("test", "unknown", "test"))) + + # Test empty unit (should return None due to falsy check) + self.assertIsNone(self.exporter._get_unit(self.exporter._create_metric_record("test", "", "test"))) + + # Test None unit + self.assertIsNone(self.exporter._get_unit(self.exporter._create_metric_record("test", None, "test"))) + + def test_get_metric_name(self): + """Test metric name extraction.""" + # Test with record that has name attribute + record = Mock() + record.name = "test_metric" + + result = self.exporter._get_metric_name(record) + self.assertEqual(result, "test_metric") + + # Test with record that has empty name (should return None) + record_empty = Mock() + record_empty.name = "" + + result_empty = self.exporter._get_metric_name(record_empty) + self.assertIsNone(result_empty) + + def test_get_dimension_names(self): + """Test dimension names extraction.""" + attributes = {"service.name": "test-service", "env": "prod", "region": "us-east-1"} + + result = self.exporter._get_dimension_names(attributes) + + # Should return all attribute keys + self.assertEqual(set(result), {"service.name", "env", "region"}) + + def test_get_attributes_key(self): + """Test attributes key generation.""" + attributes = {"service": "test", "env": "prod"} + + result = self.exporter._get_attributes_key(attributes) + + # Should be a string representation of sorted attributes + self.assertIsInstance(result, str) + self.assertIn("service", result) + self.assertIn("test", result) + self.assertIn("env", result) + self.assertIn("prod", result) + + def test_get_attributes_key_consistent(self): + """Test that attributes key generation is consistent.""" + # Same attributes in different order should produce same key + attrs1 = {"b": "2", "a": "1"} + attrs2 = {"a": "1", "b": "2"} + + key1 = self.exporter._get_attributes_key(attrs1) + key2 = self.exporter._get_attributes_key(attrs2) + + self.assertEqual(key1, key2) + + def test_group_by_attributes_and_timestamp(self): + """Test grouping by attributes and timestamp.""" + record = Mock() + record.attributes = {"env": "test"} + record.timestamp = 1234567890 + + result = self.exporter._group_by_attributes_and_timestamp(record) + + # Should return a tuple with attributes key and timestamp + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 2) + self.assertEqual(result[1], 1234567890) + + def test_generate_log_stream_name(self): + """Test log stream name generation.""" + name1 = self.exporter._generate_log_stream_name() + name2 = self.exporter._generate_log_stream_name() + + # Should generate unique names + self.assertNotEqual(name1, name2) + self.assertTrue(name1.startswith("otel-python-")) + self.assertTrue(name2.startswith("otel-python-")) + + def test_normalize_timestamp(self): + """Test timestamp normalization.""" + timestamp_ns = 1609459200000000000 # 2021-01-01 00:00:00 in nanoseconds + expected_ms = 1609459200000 # Same time in milliseconds + + result = self.exporter._normalize_timestamp(timestamp_ns) + self.assertEqual(result, expected_ms) + + def test_create_metric_record(self): + """Test metric record creation.""" + record = self.exporter._create_metric_record("test_metric", "Count", "Test description") + + self.assertIsNotNone(record) + self.assertEqual(record.name, "test_metric") + self.assertEqual(record.unit, "Count") + self.assertEqual(record.description, "Test description") + + def test_convert_gauge(self): + """Test gauge conversion.""" + metric = MockMetric("gauge_metric", "Count", "Gauge description") + dp = MockDataPoint(value=42.5, attributes={"key": "value"}) + + record = self.exporter._convert_gauge(metric, dp) + + self.assertIsNotNone(record) + self.assertEqual(record.name, "gauge_metric") + self.assertEqual(record.value, 42.5) + self.assertEqual(record.attributes, {"key": "value"}) + self.assertIsInstance(record.timestamp, int) + + def test_create_emf_log(self): + """Test EMF log creation.""" + # Create test records + gauge_record = self.exporter._create_metric_record("gauge_metric", "Count", "Gauge") + gauge_record.value = 50.0 + gauge_record.timestamp = int(time.time() * 1000) + gauge_record.attributes = {"env": "test"} + + records = [gauge_record] + resource = Resource.create({"service.name": "test-service"}) + + result = self.exporter._create_emf_log(records, resource) + + self.assertIsInstance(result, dict) + + # Check that the result is JSON serializable + json.dumps(result) # Should not raise exception + + @patch("botocore.session.Session") + def test_export_success(self, mock_session): + """Test successful export.""" + # Mock CloudWatch Logs client + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.put_log_events.return_value = {"nextSequenceToken": "12345"} + + # Create empty metrics data to test basic export flow + metrics_data = Mock() + metrics_data.resource_metrics = [] + + result = self.exporter.export(metrics_data) + + self.assertEqual(result, MetricExportResult.SUCCESS) + + def test_export_failure(self): + """Test export failure handling.""" + # Create metrics data that will cause an exception during iteration + metrics_data = Mock() + # Make resource_metrics raise an exception when iterated over + metrics_data.resource_metrics = Mock() + metrics_data.resource_metrics.__iter__ = Mock(side_effect=Exception("Test exception")) + + result = self.exporter.export(metrics_data) + + self.assertEqual(result, MetricExportResult.FAILURE) + + def test_force_flush_no_pending_events(self): + """Test force flush functionality with no pending events.""" + result = self.exporter.force_flush() + + self.assertTrue(result) + + @patch.object(AwsCloudWatchEmfExporter, "force_flush") + def test_shutdown(self, mock_force_flush): + """Test shutdown functionality.""" + mock_force_flush.return_value = True + + result = self.exporter.shutdown(timeout_millis=5000) + + self.assertTrue(result) + mock_force_flush.assert_called_once_with(5000) + + def test_send_log_event_method_exists(self): + """Test that _send_log_event method exists and can be called.""" + # Just test that the method exists and doesn't crash with basic input + log_event = {"message": "test message", "timestamp": 1234567890} + + # Mock the AWS client methods to avoid actual AWS calls + with patch.object(self.exporter.logs_client, "create_log_group"): + with patch.object(self.exporter.logs_client, "create_log_stream"): + with patch.object(self.exporter.logs_client, "put_log_events") as mock_put: + mock_put.return_value = {"nextSequenceToken": "12345"} + + # Should not raise an exception + try: + response = self.exporter._send_log_event(log_event) + # Response may be None or a dict, both are acceptable + self.assertTrue(response is None or isinstance(response, dict)) + except ClientError as error: + self.fail(f"_send_log_event raised an exception: {error}") + + def test_create_emf_log_with_resource(self): + """Test EMF log creation with resource attributes.""" + # Create test records + gauge_record = self.exporter._create_metric_record("gauge_metric", "Count", "Gauge") + gauge_record.value = 50.0 + gauge_record.timestamp = int(time.time() * 1000) + gauge_record.attributes = {"env": "test", "service": "api"} + + records = [gauge_record] + resource = Resource.create({"service.name": "test-service", "service.version": "1.0.0"}) + + result = self.exporter._create_emf_log(records, resource, 1234567890) + + # Verify EMF log structure + self.assertIn("_aws", result) + self.assertIn("CloudWatchMetrics", result["_aws"]) + self.assertEqual(result["_aws"]["Timestamp"], 1234567890) + self.assertEqual(result["Version"], "1") + + # Check resource attributes are prefixed + self.assertEqual(result["otel.resource.service.name"], "test-service") + self.assertEqual(result["otel.resource.service.version"], "1.0.0") + + # Check metric attributes + self.assertEqual(result["env"], "test") + self.assertEqual(result["service"], "api") + + # Check metric value + self.assertEqual(result["gauge_metric"], 50.0) + + # Check CloudWatch metrics structure + cw_metrics = result["_aws"]["CloudWatchMetrics"][0] + self.assertEqual(cw_metrics["Namespace"], "TestNamespace") + self.assertEqual(set(cw_metrics["Dimensions"][0]), {"env", "service"}) + self.assertEqual(cw_metrics["Metrics"][0]["Name"], "gauge_metric") + + @patch("botocore.session.Session") + def test_export_with_gauge_metrics(self, mock_session): + """Test exporting actual gauge metrics.""" + # Mock CloudWatch Logs client + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.put_log_events.return_value = {"nextSequenceToken": "12345"} + mock_client.create_log_group.side_effect = ClientError( + {"Error": {"Code": "ResourceAlreadyExistsException"}}, "CreateLogGroup" + ) + mock_client.create_log_stream.side_effect = ClientError( + {"Error": {"Code": "ResourceAlreadyExistsException"}}, "CreateLogStream" + ) + + # Create mock metrics data + resource = Resource.create({"service.name": "test-service"}) + + # Create gauge data + gauge_data = Gauge(data_points=[MockDataPoint(value=42.0, attributes={"key": "value"})]) + + metric = MockMetricWithData(name="test_gauge", data=gauge_data) + + scope_metrics = MockScopeMetrics(metrics=[metric]) + resource_metrics = MockResourceMetrics(resource=resource, scope_metrics=[scope_metrics]) + + metrics_data = Mock() + metrics_data.resource_metrics = [resource_metrics] + + result = self.exporter.export(metrics_data) + + self.assertEqual(result, MetricExportResult.SUCCESS) + # Test validates that export works with gauge metrics + + def test_get_metric_name_fallback(self): + """Test metric name extraction fallback.""" + # Test with record that has no instrument attribute + record = Mock(spec=[]) + + result = self.exporter._get_metric_name(record) + self.assertIsNone(result) + + def test_get_metric_name_empty_name(self): + """Test metric name extraction with empty name.""" + # Test with record that has empty name + record = Mock() + record.name = "" + + result = self.exporter._get_metric_name(record) + self.assertIsNone(result) + + def test_create_emf_log_skips_empty_metric_names(self): + """Test that EMF log creation skips records with empty metric names.""" + # Create a record with no metric name + record_without_name = Mock() + record_without_name.attributes = {"key": "value"} + record_without_name.value = 10.0 + record_without_name.name = None # No valid name + + # Create a record with valid metric name + valid_record = self.exporter._create_metric_record("valid_metric", "Count", "Valid metric") + valid_record.value = 20.0 + valid_record.attributes = {"key": "value"} + + records = [record_without_name, valid_record] + resource = Resource.create({"service.name": "test-service"}) + + result = self.exporter._create_emf_log(records, resource, 1234567890) + + # Only the valid record should be processed + self.assertIn("valid_metric", result) + self.assertEqual(result["valid_metric"], 20.0) + + # Check that only the valid metric is in the definitions (empty names are skipped) + cw_metrics = result["_aws"]["CloudWatchMetrics"][0] + self.assertEqual(len(cw_metrics["Metrics"]), 1) + # Ensure our valid metric is present + metric_names = [m["Name"] for m in cw_metrics["Metrics"]] + self.assertIn("valid_metric", metric_names) + + @patch("os.environ.get") + @patch("botocore.session.Session") + def test_initialization_with_env_region(self, mock_session, mock_env_get): + """Test initialization with AWS region from environment.""" + # Mock environment variable + mock_env_get.side_effect = lambda key: "us-west-1" if key == "AWS_REGION" else None + + # Mock the botocore session to avoid AWS calls + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.create_log_group.return_value = {} + mock_client.create_log_stream.return_value = {} + + exporter = AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + + # Just verify the exporter was created successfully with region handling + self.assertIsNotNone(exporter) + self.assertEqual(exporter.namespace, "TestNamespace") + + @patch("botocore.session.Session") + def test_ensure_log_group_exists_create_failure(self, mock_session): + """Test log group creation failure.""" + # Mock the botocore session + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + + # Make create fail with access denied error + mock_client.create_log_group.side_effect = ClientError({"Error": {"Code": "AccessDenied"}}, "CreateLogGroup") + mock_client.create_log_stream.return_value = {} + + with self.assertRaises(ClientError): + AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + + @patch("botocore.session.Session") + def test_ensure_log_group_exists_success(self, mock_session): + """Test log group existence check when log group already exists.""" + # Mock the botocore session + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + + # Make create fail with ResourceAlreadyExistsException (log group exists) + mock_client.create_log_group.side_effect = ClientError( + {"Error": {"Code": "ResourceAlreadyExistsException"}}, "CreateLogGroup" + ) + mock_client.create_log_stream.return_value = {} + + # This should not raise an exception + exporter = AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + self.assertIsNotNone(exporter) + # Verify create was called once + mock_client.create_log_group.assert_called_once_with(logGroupName="test-log-group") + + def test_export_with_unsupported_metric_type(self): + """Test export with unsupported metric types.""" + # Create mock metrics data with unsupported metric type + resource = Resource.create({"service.name": "test-service"}) + + # Create non-gauge data + unsupported_data = Mock() + unsupported_data.data_points = [MockDataPoint(value=42.0)] + + metric = MockMetricWithData(name="test_counter", data=unsupported_data) + + scope_metrics = MockScopeMetrics(metrics=[metric]) + resource_metrics = MockResourceMetrics(resource=resource, scope_metrics=[scope_metrics]) + + metrics_data = Mock() + metrics_data.resource_metrics = [resource_metrics] + + # Should still return success even with unsupported metrics + result = self.exporter.export(metrics_data) + self.assertEqual(result, MetricExportResult.SUCCESS) + + def test_export_with_metric_without_data(self): + """Test export with metrics that don't have data attribute.""" + # Create mock metrics data + resource = Resource.create({"service.name": "test-service"}) + + # Create metric without data attribute + metric = Mock(spec=[]) + + scope_metrics = MockScopeMetrics(metrics=[metric]) + resource_metrics = MockResourceMetrics(resource=resource, scope_metrics=[scope_metrics]) + + metrics_data = Mock() + metrics_data.resource_metrics = [resource_metrics] + + # Should still return success + result = self.exporter.export(metrics_data) + self.assertEqual(result, MetricExportResult.SUCCESS) + + +if __name__ == "__main__": + unittest.main() diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_otlp_aws_span_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py similarity index 100% rename from aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_otlp_aws_span_exporter.py rename to aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_auth_session.py deleted file mode 100644 index e0c62b89d..000000000 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_auth_session.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 -from unittest import TestCase -from unittest.mock import patch - -import requests -from botocore.credentials import Credentials - -from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession - -AWS_OTLP_TRACES_ENDPOINT = "https://xray.us-east-1.amazonaws.com/v1/traces" -AWS_OTLP_LOGS_ENDPOINT = "https://logs.us-east-1.amazonaws.com/v1/logs" - -AUTHORIZATION_HEADER = "Authorization" -X_AMZ_DATE_HEADER = "X-Amz-Date" -X_AMZ_SECURITY_TOKEN_HEADER = "X-Amz-Security-Token" - -mock_credentials = Credentials(access_key="test_access_key", secret_key="test_secret_key", token="test_session_token") - - -class TestAwsAuthSession(TestCase): - @patch("pkg_resources.get_distribution", side_effect=ImportError("test error")) - @patch.dict("sys.modules", {"botocore": None}, clear=False) - @patch("requests.Session.request", return_value=requests.Response()) - def test_aws_auth_session_no_botocore(self, _, __): - """Tests that aws_auth_session will not inject SigV4 Headers if botocore is not installed.""" - - session = AwsAuthSession("us-east-1", "xray") - actual_headers = {"test": "test"} - - session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) - - self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) - self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) - self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) - - @patch("requests.Session.request", return_value=requests.Response()) - @patch("botocore.session.Session.get_credentials", return_value=None) - def test_aws_auth_session_no_credentials(self, _, __): - """Tests that aws_auth_session will not inject SigV4 Headers if retrieving credentials returns None.""" - - session = AwsAuthSession("us-east-1", "xray") - actual_headers = {"test": "test"} - - session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) - - self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) - self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) - self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) - - @patch("requests.Session.request", return_value=requests.Response()) - @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) - def test_aws_auth_session(self, _, __): - """Tests that aws_auth_session will inject SigV4 Headers if botocore is installed.""" - - session = AwsAuthSession("us-east-1", "xray") - actual_headers = {"test": "test"} - - session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) - - self.assertIn(AUTHORIZATION_HEADER, actual_headers) - self.assertIn(X_AMZ_DATE_HEADER, actual_headers) - self.assertIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py index d122519cf..f99b0d154 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py @@ -37,12 +37,12 @@ AWS_STEPFUNCTIONS_STATEMACHINE_ARN, ) from amazon.opentelemetry.distro._aws_metric_attribute_generator import _AwsMetricAttributeGenerator -from amazon.opentelemetry.distro._aws_span_processing_util import GEN_AI_REQUEST_MODEL from amazon.opentelemetry.distro.metric_attribute_generator import DEPENDENCY_METRIC, SERVICE_METRIC from opentelemetry.attributes import BoundedAttributes from opentelemetry.sdk.resources import _DEFAULT_RESOURCE, SERVICE_NAME from opentelemetry.sdk.trace import ReadableSpan, Resource from opentelemetry.sdk.util.instrumentation import InstrumentationScope +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_REQUEST_MODEL from opentelemetry.semconv.trace import MessagingOperationValues, SpanAttributes from opentelemetry.trace import SpanContext, SpanKind from opentelemetry.util.types import Attributes diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 13397a0d5..dbaee3c33 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -50,6 +50,7 @@ from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.metrics import get_meter_provider from opentelemetry.processor.baggage import BaggageSpanProcessor from opentelemetry.sdk.environment_variables import OTEL_TRACES_SAMPLER, OTEL_TRACES_SAMPLER_ARG from opentelemetry.sdk.metrics._internal.export import PeriodicExportingMetricReader @@ -87,6 +88,22 @@ def setUpClass(cls): aws_otel_configurator.configure() cls.tracer_provider: TracerProvider = get_tracer_provider() + @classmethod + def tearDownClass(cls): + # Explicitly shut down meter provider to avoid I/O errors on Python 3.9 with gevent + # This ensures ConsoleMetricExporter is properly closed before Python cleanup + try: + meter_provider = get_meter_provider() + if hasattr(meter_provider, "force_flush"): + meter_provider.force_flush() + if hasattr(meter_provider, "shutdown"): + meter_provider.shutdown() + except (ValueError, RuntimeError): + # Ignore errors during cleanup: + # - ValueError: I/O operation on closed file (the exact error we're trying to prevent) + # - RuntimeError: Provider already shut down or threading issues + pass + def tearDown(self): os.environ.pop("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", None) os.environ.pop("OTEL_AWS_APPLICATION_SIGNALS_RUNTIME_ENABLED", None) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelemetry_distro.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelemetry_distro.py index b77e4fbf8..7368a04c8 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelemetry_distro.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelemetry_distro.py @@ -1,13 +1,12 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +from importlib.metadata import PackageNotFoundError, version from unittest import TestCase -from pkg_resources import DistributionNotFound, require - class TestAwsOpenTelemetryDistro(TestCase): def test_package_available(self): try: - require(["aws-opentelemetry-distro"]) - except DistributionNotFound: + version("aws-opentelemetry-distro") + except PackageNotFoundError: self.fail("aws-opentelemetry-distro not installed") diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py index 87e6c4810..8eff6f2e6 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py @@ -1,17 +1,15 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -import json -import math import os -from io import BytesIO +from importlib.metadata import PackageNotFoundError from typing import Any, Dict from unittest import TestCase from unittest.mock import MagicMock, patch import gevent.monkey -import pkg_resources -from botocore.response import StreamingBody +import opentelemetry.sdk.extension.aws.resource.ec2 as ec2_resource +import opentelemetry.sdk.extension.aws.resource.eks as eks_resource from amazon.opentelemetry.distro.patches._instrumentation_patch import ( AWS_GEVENT_PATCH_MODULES, apply_instrumentation_patches, @@ -38,7 +36,7 @@ _LAMBDA_SOURCE_MAPPING_ID: str = "lambdaEventSourceMappingID" # Patch names -GET_DISTRIBUTION_PATCH: str = "amazon.opentelemetry.distro._utils.pkg_resources.get_distribution" +IMPORTLIB_METADATA_VERSION_PATCH: str = "amazon.opentelemetry.distro._utils.version" class TestInstrumentationPatch(TestCase): @@ -60,7 +58,7 @@ class TestInstrumentationPatch(TestCase): def test_instrumentation_patch(self): # Set up method patches used by all tests - self.method_patches[GET_DISTRIBUTION_PATCH] = patch(GET_DISTRIBUTION_PATCH).start() + self.method_patches[IMPORTLIB_METADATA_VERSION_PATCH] = patch(IMPORTLIB_METADATA_VERSION_PATCH).start() # Run tests that validate patch behaviour before and after patching self._run_patch_behaviour_tests() @@ -73,7 +71,7 @@ def test_instrumentation_patch(self): def _run_patch_behaviour_tests(self): # Test setup - self.method_patches[GET_DISTRIBUTION_PATCH].return_value = "CorrectDistributionObject" + self.method_patches[IMPORTLIB_METADATA_VERSION_PATCH].return_value = "1.0.0" # Test setup to not patch gevent os.environ[AWS_GEVENT_PATCH_MODULES] = "none" @@ -120,6 +118,8 @@ def _run_patch_mechanism_tests(self): """ self._test_botocore_installed_flag() self._reset_mocks() + self._test_resource_detector_patches() + self._reset_mocks() def _test_unpatched_botocore_instrumentation(self): # Kinesis @@ -147,7 +147,7 @@ def _test_unpatched_botocore_instrumentation(self): ) # BedrockRuntime - self.assertFalse("bedrock-runtime" in _KNOWN_EXTENSIONS, "Upstream has added a bedrock-runtime extension") + self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS, "Upstream has added a bedrock-runtime extension") # SecretsManager self.assertFalse("secretsmanager" in _KNOWN_EXTENSIONS, "Upstream has added a SecretsManager extension") @@ -213,95 +213,9 @@ def _test_patched_botocore_instrumentation(self): bedrock_agent_runtime_sucess_attributes: Dict[str, str] = _do_on_success_bedrock("bedrock-agent-runtime") self.assertEqual(len(bedrock_agent_runtime_sucess_attributes), 0) - # BedrockRuntime - Amazon Titan + # BedrockRuntime self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS) - self._test_patched_bedrock_runtime_invoke_model( - model_id="amazon.titan-embed-text-v1", - max_tokens=512, - temperature=0.9, - top_p=0.75, - finish_reason="FINISH", - input_tokens=123, - output_tokens=456, - ) - - self._test_patched_bedrock_runtime_invoke_model( - model_id="amazon.nova-pro-v1:0", - max_tokens=500, - temperature=0.9, - top_p=0.7, - finish_reason="FINISH", - input_tokens=123, - output_tokens=456, - ) - - # BedrockRuntime - Anthropic Claude - self._test_patched_bedrock_runtime_invoke_model( - model_id="anthropic.claude-v2:1", - max_tokens=512, - temperature=0.5, - top_p=0.999, - finish_reason="end_turn", - input_tokens=23, - output_tokens=36, - ) - - # BedrockRuntime - Meta LLama - self._test_patched_bedrock_runtime_invoke_model( - model_id="meta.llama2-13b-chat-v1", - max_tokens=512, - temperature=0.5, - top_p=0.9, - finish_reason="stop", - input_tokens=31, - output_tokens=36, - ) - - # BedrockRuntime - Cohere Command-r - cohere_input = "Hello, world" - cohere_output = "Goodbye, world" - - self._test_patched_bedrock_runtime_invoke_model( - model_id="cohere.command-r-v1:0", - max_tokens=512, - temperature=0.5, - top_p=0.75, - finish_reason="COMPLETE", - input_tokens=math.ceil(len(cohere_input) / 6), - output_tokens=math.ceil(len(cohere_output) / 6), - input_prompt=cohere_input, - output_prompt=cohere_output, - ) - - # BedrockRuntime - AI21 Jambda - self._test_patched_bedrock_runtime_invoke_model( - model_id="ai21.jamba-1-5-large-v1:0", - max_tokens=512, - temperature=0.5, - top_p=0.999, - finish_reason="end_turn", - input_tokens=23, - output_tokens=36, - ) - - # BedrockRuntime - Mistral - msg = "Hello World" - mistral_input = f"[INST] {msg} [/INST]" - mistral_output = "Goodbye, World" - - self._test_patched_bedrock_runtime_invoke_model( - model_id="mistral.mistral-7b-instruct-v0:2", - max_tokens=512, - temperature=0.5, - top_p=0.9, - finish_reason="stop", - input_tokens=math.ceil(len(mistral_input) / 6), - output_tokens=math.ceil(len(mistral_output) / 6), - input_prompt=mistral_input, - output_prompt=mistral_output, - ) - # SecretsManager self.assertTrue("secretsmanager" in _KNOWN_EXTENSIONS) secretsmanager_attributes: Dict[str, str] = _do_extract_secretsmanager_attributes() @@ -369,17 +283,13 @@ def _test_botocore_installed_flag(self): with patch( "amazon.opentelemetry.distro.patches._botocore_patches._apply_botocore_instrumentation_patches" ) as mock_apply_patches: - get_distribution_patch: patch = self.method_patches[GET_DISTRIBUTION_PATCH] - get_distribution_patch.side_effect = pkg_resources.DistributionNotFound - apply_instrumentation_patches() - mock_apply_patches.assert_not_called() - - get_distribution_patch.side_effect = pkg_resources.VersionConflict("botocore==1.0.0", "botocore==0.0.1") + get_distribution_patch: patch = self.method_patches[IMPORTLIB_METADATA_VERSION_PATCH] + get_distribution_patch.side_effect = PackageNotFoundError apply_instrumentation_patches() mock_apply_patches.assert_not_called() get_distribution_patch.side_effect = None - get_distribution_patch.return_value = "CorrectDistributionObject" + get_distribution_patch.return_value = "1.0.0" apply_instrumentation_patches() mock_apply_patches.assert_called() @@ -389,146 +299,6 @@ def _test_patched_bedrock_instrumentation(self): self.assertEqual(len(bedrock_sucess_attributes), 1) self.assertEqual(bedrock_sucess_attributes["aws.bedrock.guardrail.id"], _BEDROCK_GUARDRAIL_ID) - def _test_patched_bedrock_runtime_invoke_model(self, **args): - model_id = args.get("model_id", None) - max_tokens = args.get("max_tokens", None) - temperature = args.get("temperature", None) - top_p = args.get("top_p", None) - finish_reason = args.get("finish_reason", None) - input_tokens = args.get("input_tokens", None) - output_tokens = args.get("output_tokens", None) - input_prompt = args.get("input_prompt", None) - output_prompt = args.get("output_prompt", None) - - def get_model_response_request(): - request_body = {} - response_body = {} - - if "amazon.titan" in model_id: - request_body = { - "textGenerationConfig": { - "maxTokenCount": max_tokens, - "temperature": temperature, - "topP": top_p, - } - } - - response_body = { - "inputTextTokenCount": input_tokens, - "results": [ - { - "tokenCount": output_tokens, - "outputText": "testing", - "completionReason": finish_reason, - } - ], - } - - if "amazon.nova" in model_id: - request_body = { - "inferenceConfig": { - "max_new_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - } - - response_body = { - "output": {"message": {"content": [{"text": ""}], "role": "assistant"}}, - "stopReason": finish_reason, - "usage": {"inputTokens": input_tokens, "outputTokens": output_tokens}, - } - - if "anthropic.claude" in model_id: - request_body = { - "anthropic_version": "bedrock-2023-05-31", - "max_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - - response_body = { - "stop_reason": finish_reason, - "stop_sequence": None, - "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens}, - } - - if "ai21.jamba" in model_id: - request_body = { - "max_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - - response_body = { - "choices": [{"finish_reason": finish_reason}], - "usage": { - "prompt_tokens": input_tokens, - "completion_tokens": output_tokens, - "total_tokens": (input_tokens + output_tokens), - }, - } - - if "meta.llama" in model_id: - request_body = { - "max_gen_len": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - - response_body = { - "prompt_token_count": input_tokens, - "generation_token_count": output_tokens, - "stop_reason": finish_reason, - } - - if "cohere.command" in model_id: - request_body = { - "message": input_prompt, - "max_tokens": max_tokens, - "temperature": temperature, - "p": top_p, - } - - response_body = { - "text": output_prompt, - "finish_reason": finish_reason, - } - - if "mistral" in model_id: - request_body = { - "prompt": input_prompt, - "max_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - - response_body = {"outputs": [{"text": output_prompt, "stop_reason": finish_reason}]} - - json_bytes = json.dumps(response_body).encode("utf-8") - - return json.dumps(request_body), StreamingBody(BytesIO(json_bytes), len(json_bytes)) - - request_body, response_body = get_model_response_request() - - bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock( - "bedrock-runtime", model_id=model_id, request_body=request_body - ) - bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock( - "bedrock-runtime", model_id=model_id, streaming_body=response_body - ) - - bedrock_runtime_attributes.update(bedrock_runtime_success_attributes) - - self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM) - self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], model_id) - self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], max_tokens) - self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], temperature) - self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], top_p) - self.assertEqual(bedrock_runtime_attributes["gen_ai.usage.input_tokens"], input_tokens) - self.assertEqual(bedrock_runtime_attributes["gen_ai.usage.output_tokens"], output_tokens) - self.assertEqual(bedrock_runtime_attributes["gen_ai.response.finish_reasons"], [finish_reason]) - def _test_patched_bedrock_agent_instrumentation(self): """For bedrock-agent service, both extract_attributes and on_success provides attributes, the attributes depend on the API being invoked.""" @@ -586,6 +356,53 @@ def _test_patched_bedrock_agent_instrumentation(self): self.assertEqual(len(bedrock_agent_success_attributes), 1) self.assertEqual(bedrock_agent_success_attributes[attribute_tuple[0]], attribute_tuple[1]) + def _test_resource_detector_patches(self): + """Test that resource detector patches are applied and work correctly""" + # Test that the functions were patched + self.assertIsNotNone(ec2_resource._aws_http_request) + self.assertIsNotNone(eks_resource._aws_http_request) + + # Test EC2 patched function + with patch("amazon.opentelemetry.distro.patches._resource_detector_patches.urlopen") as mock_urlopen: + mock_response = MagicMock() + mock_response.read.return_value = b'{"test": "ec2-data"}' + mock_urlopen.return_value.__enter__.return_value = mock_response + + result = ec2_resource._aws_http_request("GET", "/test/path", {"X-Test": "header"}) + self.assertEqual(result, '{"test": "ec2-data"}') + + # Verify the request was made correctly + args, kwargs = mock_urlopen.call_args + request = args[0] + self.assertEqual(request.full_url, "http://169.254.169.254/test/path") + self.assertEqual(request.headers, {"X-test": "header"}) + self.assertEqual(kwargs["timeout"], 5) + + # Test EKS patched function + with patch("amazon.opentelemetry.distro.patches._resource_detector_patches.urlopen") as mock_urlopen, patch( + "amazon.opentelemetry.distro.patches._resource_detector_patches.ssl.create_default_context" + ) as mock_ssl: + mock_response = MagicMock() + mock_response.read.return_value = b'{"test": "eks-data"}' + mock_urlopen.return_value.__enter__.return_value = mock_response + + mock_context = MagicMock() + mock_ssl.return_value = mock_context + + result = eks_resource._aws_http_request("GET", "/api/v1/test", "Bearer token123") + self.assertEqual(result, '{"test": "eks-data"}') + + # Verify the request was made correctly + args, kwargs = mock_urlopen.call_args + request = args[0] + self.assertEqual(request.full_url, "https://kubernetes.default.svc/api/v1/test") + self.assertEqual(request.headers, {"Authorization": "Bearer token123"}) + self.assertEqual(kwargs["timeout"], 5) + self.assertEqual(kwargs["context"], mock_context) + + # Verify SSL context was created with correct CA file + mock_ssl.assert_called_once_with(cafile="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + def _reset_mocks(self): for method_patch in self.method_patches.values(): method_patch.reset_mock() @@ -678,6 +495,7 @@ def _do_on_success( ) -> Dict[str, str]: span_mock: Span = MagicMock() mock_call_context = MagicMock() + mock_instrumentor_context = MagicMock() span_attributes: Dict[str, str] = {} def set_side_effect(set_key, set_value): @@ -692,6 +510,6 @@ def set_side_effect(set_key, set_value): mock_call_context.params = params extension = _KNOWN_EXTENSIONS[service_name]()(mock_call_context) - extension.on_success(span_mock, result) + extension.on_success(span_mock, result, mock_instrumentor_context) return span_attributes diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py new file mode 100644 index 000000000..0839aec98 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py @@ -0,0 +1,96 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import os +from importlib.metadata import PackageNotFoundError +from unittest import TestCase +from unittest.mock import patch + +from amazon.opentelemetry.distro._utils import AGENT_OBSERVABILITY_ENABLED, is_agent_observability_enabled, is_installed + + +class TestUtils(TestCase): + def setUp(self): + # Store original env var if it exists + self.original_env = os.environ.get(AGENT_OBSERVABILITY_ENABLED) + + def tearDown(self): + # Restore original env var + if self.original_env is not None: + os.environ[AGENT_OBSERVABILITY_ENABLED] = self.original_env + elif AGENT_OBSERVABILITY_ENABLED in os.environ: + del os.environ[AGENT_OBSERVABILITY_ENABLED] + + def test_is_installed_package_not_found(self): + """Test is_installed returns False when package is not found""" + with patch("amazon.opentelemetry.distro._utils.version") as mock_version: + # Simulate package not found + mock_version.side_effect = PackageNotFoundError("test-package") + + result = is_installed("test-package>=1.0.0") + self.assertFalse(result) + + def test_is_installed(self): + """Test is_installed returns True when version matches the specifier""" + with patch("amazon.opentelemetry.distro._utils.version") as mock_version: + # Package is installed and version matches requirement + mock_version.return_value = "2.5.0" + + # Test with compatible version requirement + result = is_installed("test-package>=2.0.0") + self.assertTrue(result) + + # Test with exact version match + mock_version.return_value = "1.0.0" + result = is_installed("test-package==1.0.0") + self.assertTrue(result) + + # Test with version range + mock_version.return_value = "1.5.0" + result = is_installed("test-package>=1.0,<2.0") + self.assertTrue(result) + + def test_is_installed_version_mismatch(self): + """Test is_installed returns False when version doesn't match""" + with patch("amazon.opentelemetry.distro._utils.version") as mock_version: + # Package is installed but version doesn't match requirement + mock_version.return_value = "1.0.0" + + # Test with incompatible version requirement + result = is_installed("test-package>=2.0.0") + self.assertFalse(result) + + def test_is_agent_observability_enabled_various_values(self): + """Test is_agent_observability_enabled with various environment variable values""" + # Test with "True" (uppercase) + os.environ[AGENT_OBSERVABILITY_ENABLED] = "True" + self.assertTrue(is_agent_observability_enabled()) + + # Test with "TRUE" (all caps) + os.environ[AGENT_OBSERVABILITY_ENABLED] = "TRUE" + self.assertTrue(is_agent_observability_enabled()) + + # Test with "true" (lowercase) + os.environ[AGENT_OBSERVABILITY_ENABLED] = "true" + self.assertTrue(is_agent_observability_enabled()) + + # Test with "false" + os.environ[AGENT_OBSERVABILITY_ENABLED] = "false" + self.assertFalse(is_agent_observability_enabled()) + + # Test with "False" + os.environ[AGENT_OBSERVABILITY_ENABLED] = "False" + self.assertFalse(is_agent_observability_enabled()) + + # Test with arbitrary string + os.environ[AGENT_OBSERVABILITY_ENABLED] = "yes" + self.assertFalse(is_agent_observability_enabled()) + + # Test with empty string + os.environ[AGENT_OBSERVABILITY_ENABLED] = "" + self.assertFalse(is_agent_observability_enabled()) + + # Test when env var is not set + if AGENT_OBSERVABILITY_ENABLED in os.environ: + del os.environ[AGENT_OBSERVABILITY_ENABLED] + self.assertFalse(is_agent_observability_enabled()) diff --git a/contract-tests/images/applications/botocore/botocore_server.py b/contract-tests/images/applications/botocore/botocore_server.py index 6c315a4dc..80ecbc6fe 100644 --- a/contract-tests/images/applications/botocore/botocore_server.py +++ b/contract-tests/images/applications/botocore/botocore_server.py @@ -435,7 +435,7 @@ def get_model_request_response(path): "inferenceConfig": { "max_new_tokens": 800, "temperature": 0.9, - "top_p": 0.7, + "topP": 0.7, }, } @@ -496,32 +496,6 @@ def get_model_request_response(path): "text": "test-generation-text", } - if "ai21.jamba" in path: - model_id = "ai21.jamba-1-5-large-v1:0" - - request_body = { - "messages": [ - { - "role": "user", - "content": prompt, - }, - ], - "top_p": 0.8, - "temperature": 0.6, - "max_tokens": 512, - } - - response_body = { - "stop_reason": "end_turn", - "usage": { - "prompt_tokens": 21, - "completion_tokens": 24, - }, - "choices": [ - {"finish_reason": "stop"}, - ], - } - if "mistral" in path: model_id = "mistral.mistral-7b-instruct-v0:2" diff --git a/contract-tests/images/applications/botocore/requirements.txt b/contract-tests/images/applications/botocore/requirements.txt index 25113e3f4..61ddebf98 100644 --- a/contract-tests/images/applications/botocore/requirements.txt +++ b/contract-tests/images/applications/botocore/requirements.txt @@ -1,5 +1,3 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 botocore==1.34.143 boto3==1.34.143 diff --git a/contract-tests/images/applications/django/requirements.txt b/contract-tests/images/applications/django/requirements.txt index 9b54a7736..84dfdeabb 100644 --- a/contract-tests/images/applications/django/requirements.txt +++ b/contract-tests/images/applications/django/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 django==5.0.11 diff --git a/contract-tests/images/applications/mysql-connector/requirements.txt b/contract-tests/images/applications/mysql-connector/requirements.txt index 9ca44d2e4..f285dcb1f 100644 --- a/contract-tests/images/applications/mysql-connector/requirements.txt +++ b/contract-tests/images/applications/mysql-connector/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 mysql-connector-python~=9.1.0 diff --git a/contract-tests/images/applications/mysqlclient/requirements.txt b/contract-tests/images/applications/mysqlclient/requirements.txt index 49c6b70f3..933e606b4 100644 --- a/contract-tests/images/applications/mysqlclient/requirements.txt +++ b/contract-tests/images/applications/mysqlclient/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 mysqlclient==2.2.4 diff --git a/contract-tests/images/applications/psycopg2/requirements.txt b/contract-tests/images/applications/psycopg2/requirements.txt index f2d278475..8786aff35 100644 --- a/contract-tests/images/applications/psycopg2/requirements.txt +++ b/contract-tests/images/applications/psycopg2/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 psycopg2==2.9.9 diff --git a/contract-tests/images/applications/pymysql/requirements.txt b/contract-tests/images/applications/pymysql/requirements.txt index ddda9b1fe..8ba76defb 100644 --- a/contract-tests/images/applications/pymysql/requirements.txt +++ b/contract-tests/images/applications/pymysql/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 pymysql==1.1.1 diff --git a/contract-tests/images/applications/requests/requirements.txt b/contract-tests/images/applications/requests/requirements.txt index 369049d22..700b31404 100644 --- a/contract-tests/images/applications/requests/requirements.txt +++ b/contract-tests/images/applications/requests/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 requests~=2.0 diff --git a/contract-tests/images/mock-collector/pyproject.toml b/contract-tests/images/mock-collector/pyproject.toml index 422e2a5b1..42e13c868 100644 --- a/contract-tests/images/mock-collector/pyproject.toml +++ b/contract-tests/images/mock-collector/pyproject.toml @@ -11,9 +11,9 @@ requires-python = ">=3.9" dependencies = [ "grpcio ~= 1.66.0", - "opentelemetry-proto==1.25.0", - "opentelemetry-sdk==1.25.0", - "protobuf==4.25.2", + "opentelemetry-proto==1.33.1", + "opentelemetry-sdk==1.33.1", + "protobuf==5.26.1", "typing-extensions==4.12.2" ] diff --git a/contract-tests/images/mock-collector/requirements.txt b/contract-tests/images/mock-collector/requirements.txt index a0c5454cd..12e69148b 100644 --- a/contract-tests/images/mock-collector/requirements.txt +++ b/contract-tests/images/mock-collector/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.66.2 -opentelemetry-proto==1.25.0 -opentelemetry-sdk==1.25.0 -protobuf==4.25.2 +opentelemetry-proto==1.33.1 +opentelemetry-sdk==1.33.1 +protobuf==5.26.1 typing-extensions==4.12.2 diff --git a/contract-tests/tests/pyproject.toml b/contract-tests/tests/pyproject.toml index 0df6f6a1c..5c2895fab 100644 --- a/contract-tests/tests/pyproject.toml +++ b/contract-tests/tests/pyproject.toml @@ -10,8 +10,8 @@ license = "Apache-2.0" requires-python = ">=3.9" dependencies = [ - "opentelemetry-proto==1.25.0", - "opentelemetry-sdk==1.25.0", + "opentelemetry-proto==1.33.1", + "opentelemetry-sdk==1.33.1", "testcontainers==3.7.1", "grpcio==1.66.2", "docker==7.1.0", diff --git a/contract-tests/tests/test/amazon/botocore/botocore_test.py b/contract-tests/tests/test/amazon/botocore/botocore_test.py index ed04c9514..549ec3f50 100644 --- a/contract-tests/tests/test/amazon/botocore/botocore_test.py +++ b/contract-tests/tests/test/amazon/botocore/botocore_test.py @@ -440,7 +440,7 @@ def test_bedrock_runtime_invoke_model_amazon_titan(self): _GEN_AI_USAGE_INPUT_TOKENS: 15, _GEN_AI_USAGE_OUTPUT_TOKENS: 13, }, - span_name="Bedrock Runtime.InvokeModel", + span_name="text_completion amazon.titan-text-premier-v1:0", ) def test_bedrock_runtime_invoke_model_amazon_nova(self): @@ -458,6 +458,7 @@ def test_bedrock_runtime_invoke_model_amazon_nova(self): cloudformation_primary_identifier="amazon.nova-pro-v1:0", request_specific_attributes={ _GEN_AI_REQUEST_MODEL: "amazon.nova-pro-v1:0", + _GEN_AI_SYSTEM: "aws.bedrock", _GEN_AI_REQUEST_MAX_TOKENS: 800, _GEN_AI_REQUEST_TEMPERATURE: 0.9, _GEN_AI_REQUEST_TOP_P: 0.7, @@ -467,7 +468,7 @@ def test_bedrock_runtime_invoke_model_amazon_nova(self): _GEN_AI_USAGE_INPUT_TOKENS: 432, _GEN_AI_USAGE_OUTPUT_TOKENS: 681, }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat amazon.nova-pro-v1:0", ) def test_bedrock_runtime_invoke_model_anthropic_claude(self): @@ -495,7 +496,7 @@ def test_bedrock_runtime_invoke_model_anthropic_claude(self): _GEN_AI_USAGE_INPUT_TOKENS: 15, _GEN_AI_USAGE_OUTPUT_TOKENS: 13, }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat anthropic.claude-v2:1", ) def test_bedrock_runtime_invoke_model_meta_llama(self): @@ -523,7 +524,7 @@ def test_bedrock_runtime_invoke_model_meta_llama(self): _GEN_AI_USAGE_INPUT_TOKENS: 31, _GEN_AI_USAGE_OUTPUT_TOKENS: 49, }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat meta.llama2-13b-chat-v1", ) def test_bedrock_runtime_invoke_model_cohere_command(self): @@ -553,35 +554,7 @@ def test_bedrock_runtime_invoke_model_cohere_command(self): ), _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-generation-text") / 6), }, - span_name="Bedrock Runtime.InvokeModel", - ) - - def test_bedrock_runtime_invoke_model_ai21_jamba(self): - self.do_test_requests( - "bedrock/invokemodel/invoke-model/ai21.jamba-1-5-large-v1:0", - "GET", - 200, - 0, - 0, - rpc_service="Bedrock Runtime", - remote_service="AWS::BedrockRuntime", - remote_operation="InvokeModel", - remote_resource_type="AWS::Bedrock::Model", - remote_resource_identifier="ai21.jamba-1-5-large-v1:0", - cloudformation_primary_identifier="ai21.jamba-1-5-large-v1:0", - request_specific_attributes={ - _GEN_AI_REQUEST_MODEL: "ai21.jamba-1-5-large-v1:0", - _GEN_AI_SYSTEM: "aws.bedrock", - _GEN_AI_REQUEST_MAX_TOKENS: 512, - _GEN_AI_REQUEST_TEMPERATURE: 0.6, - _GEN_AI_REQUEST_TOP_P: 0.8, - }, - response_specific_attributes={ - _GEN_AI_RESPONSE_FINISH_REASONS: ["stop"], - _GEN_AI_USAGE_INPUT_TOKENS: 21, - _GEN_AI_USAGE_OUTPUT_TOKENS: 24, - }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat cohere.command-r-v1:0", ) def test_bedrock_runtime_invoke_model_mistral(self): @@ -611,7 +584,7 @@ def test_bedrock_runtime_invoke_model_mistral(self): ), _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-output-text") / 6), }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat mistral.mistral-7b-instruct-v0:2", ) def test_bedrock_get_guardrail(self): From b75fe99545d4ab092c66ef09f534f26c6c1d1644 Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:19:50 +0000 Subject: [PATCH 03/17] linting fix --- .../otlp/aws/logs/test_aws_batch_log_record_processor.py | 2 ++ .../exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 346b44291..5f61f40f6 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -1,3 +1,5 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 import time import unittest from typing import List diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 82491bc01..ad0a1ddca 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -1,3 +1,5 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 import time from unittest import TestCase from unittest.mock import patch From d588605ed5b68504df2cd9a4a84d86718ee3bb0a Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:26:01 +0000 Subject: [PATCH 04/17] linting fix --- .../logs/aws_batch_log_record_processor.py | 4 +-- .../test_aws_batch_log_record_processor.py | 16 ++++++------ .../aws/logs/test_otlp_aws_logs_exporter.py | 25 ++++++++++--------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index e57b03f3f..f12c9330f 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -87,7 +87,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: self._exporter.export(batch) except Exception as exception: # pylint: disable=broad-exception-caught - _logger.exception("Exception while exporting logs: " + str(exception)) + _logger.exception("Exception while exporting logs: %s", exception) detach(token) def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: @@ -145,7 +145,7 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: new_queue.append((content, current_depth + 1)) else: _logger.debug( - f"Max log depth of {depth} exceeded. Log data size will not be accurately calculated." + "Max log depth of %s exceeded. Log data size will not be accurately calculated.", depth ) queue = new_queue diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 5f61f40f6..8e639606c 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -99,9 +99,9 @@ def test_process_log_data_primitive(self): primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None] expected_sizes = [4, 4, 1, 3, 4, 5, 0] - for i in range(len(primitives)): + for index, primitive in enumerate(primitives): log = self.generate_test_log_data( - log_body=primitives[i], + log_body=primitive, attr_key="", attr_val="", log_body_depth=-1, @@ -109,7 +109,7 @@ def test_process_log_data_primitive(self): count=1, ) - expected_size = self.base_log_size + expected_sizes[i] + expected_size = self.base_log_size + expected_sizes[index] actual_size = self.processor._estimate_log_size(log[0]) self.assertEqual(actual_size, expected_size) @@ -254,9 +254,9 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): 4: 2, # 5th batch (index 4) should have 2 logs } - for i, call in enumerate(batches): + for index, call in enumerate(batches): batch = call[0][0] - expected_size = expected_sizes[i] + expected_size = expected_sizes[index] self.assertEqual(len(batch), expected_size) @staticmethod @@ -282,11 +282,11 @@ def generate_nested_value(depth, value, create_map=True) -> AnyValue: logs = [] - for i in range(count): + for index in range(count): record = LogRecord( timestamp=int(time.time_ns()), - trace_id=int(f"0x{i + 1:032x}", 16), - span_id=int(f"0x{i + 1:016x}", 16), + trace_id=int(f"0x{index + 1:032x}", 16), + span_id=int(f"0x{index + 1:016x}", 16), trace_flags=TraceFlags(1), severity_text="INFO", severity_number=SeverityNumber.INFO, diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index ad0a1ddca..5c4646612 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -105,8 +105,8 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header delays = mock_sleep.call_args_list - for i in range(len(delays)): - self.assertEqual(delays[i][0][0], 2**i) + for i, delay in enumerate(delays): + self.assertEqual(delay[0][0], 2**i) # Number of calls: 1 + len(1, 2, 4, 8, 16, 32 delays) self.assertEqual(mock_request.call_count, 7) @@ -125,8 +125,8 @@ def test_should_export_again_with_server_delay_if_retryable_and_retry_after_head result = self.exporter.export(self.logs) delays = mock_sleep.call_args_list - for i in range(len(delays)): - self.assertEqual(delays[i][0][0], 10) + for delay in delays: + self.assertEqual(delay[0][0], 10) self.assertEqual(mock_sleep.call_count, 3) self.assertEqual(mock_request.call_count, 4) @@ -152,8 +152,8 @@ def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after result = self.exporter.export(self.logs) delays = mock_sleep.call_args_list - for i in range(len(delays)): - self.assertEqual(delays[i][0][0], 2**i) + for index, delay in enumerate(delays): + self.assertEqual(delay[0][0], 2**index) self.assertEqual(mock_sleep.call_count, 3) self.assertEqual(mock_request.call_count, 4) @@ -167,18 +167,19 @@ def test_export_connection_error_retry(self, mock_request): self.assertEqual(mock_request.call_count, 2) self.assertEqual(result, LogExportResult.SUCCESS) - def generate_test_log_data(self, count=5): + @staticmethod + def generate_test_log_data(count=5): logs = [] - for i in range(count): + for index in range(count): record = LogRecord( timestamp=int(time.time_ns()), - trace_id=int(f"0x{i + 1:032x}", 16), - span_id=int(f"0x{i + 1:016x}", 16), + trace_id=int(f"0x{index + 1:032x}", 16), + span_id=int(f"0x{index + 1:016x}", 16), trace_flags=TraceFlags(1), severity_text="INFO", severity_number=SeverityNumber.INFO, - body=f"Test log {i + 1}", - attributes={"test.attribute": f"value-{i + 1}"}, + body=f"Test log {index + 1}", + attributes={"test.attribute": f"value-{index + 1}"}, ) log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) From c78aca5eeff097b9b662d4a3cde9b0f29e907aad Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:33:02 +0000 Subject: [PATCH 05/17] linting fix --- .../metrics/aws_cloudwatch_emf_exporter.py | 523 ------------------ .../aws/logs/test_otlp_aws_logs_exporter.py | 4 +- .../test_aws_cloudwatch_emf_exporter.py | 2 +- 3 files changed, 3 insertions(+), 526 deletions(-) delete mode 100644 aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py deleted file mode 100644 index e2e364b03..000000000 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py +++ /dev/null @@ -1,523 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 - -# pylint: disable=no-self-use - -import json -import logging -import time -import uuid -from collections import defaultdict -from typing import Any, Dict, List, Optional, Tuple - -import botocore.session -from botocore.exceptions import ClientError - -from opentelemetry.sdk.metrics import ( - Counter, - Histogram, - ObservableCounter, - ObservableGauge, - ObservableUpDownCounter, - UpDownCounter, -) -from opentelemetry.sdk.metrics._internal.point import Metric -from opentelemetry.sdk.metrics.export import ( - AggregationTemporality, - Gauge, - MetricExporter, - MetricExportResult, - MetricsData, - NumberDataPoint, -) -from opentelemetry.sdk.resources import Resource -from opentelemetry.util.types import Attributes - -logger = logging.getLogger(__name__) - - -class MetricRecord: - """The metric data unified representation of all OTel metrics for OTel to CW EMF conversion.""" - - def __init__(self, metric_name: str, metric_unit: str, metric_description: str): - """ - Initialize metric record. - - Args: - metric_name: Name of the metric - metric_unit: Unit of the metric - metric_description: Description of the metric - """ - # Instrument metadata - self.name = metric_name - self.unit = metric_unit - self.description = metric_description - - # Will be set by conversion methods - self.timestamp: Optional[int] = None - self.attributes: Attributes = {} - - # Different metric type data - only one will be set per record - self.value: Optional[float] = None - self.sum_data: Optional[Any] = None - self.histogram_data: Optional[Any] = None - self.exp_histogram_data: Optional[Any] = None - - -class AwsCloudWatchEmfExporter(MetricExporter): - """ - OpenTelemetry metrics exporter for CloudWatch EMF format. - - This exporter converts OTel metrics into CloudWatch EMF logs which are then - sent to CloudWatch Logs. CloudWatch Logs automatically extracts the metrics - from the EMF logs. - - https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html - - """ - - # CloudWatch EMF supported units - # Ref: https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_MetricDatum.html - EMF_SUPPORTED_UNITS = { - "Seconds", - "Microseconds", - "Milliseconds", - "Bytes", - "Kilobytes", - "Megabytes", - "Gigabytes", - "Terabytes", - "Bits", - "Kilobits", - "Megabits", - "Gigabits", - "Terabits", - "Percent", - "Count", - "Bytes/Second", - "Kilobytes/Second", - "Megabytes/Second", - "Gigabytes/Second", - "Terabytes/Second", - "Bits/Second", - "Kilobits/Second", - "Megabits/Second", - "Gigabits/Second", - "Terabits/Second", - "Count/Second", - "None", - } - - # OTel to CloudWatch unit mapping - # Ref: opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/grouped_metric.go#L188 - UNIT_MAPPING = { - "1": "", - "ns": "", - "ms": "Milliseconds", - "s": "Seconds", - "us": "Microseconds", - "By": "Bytes", - "bit": "Bits", - } - - def __init__( - self, - namespace: str = "default", - log_group_name: str = None, - log_stream_name: Optional[str] = None, - aws_region: Optional[str] = None, - preferred_temporality: Optional[Dict[type, AggregationTemporality]] = None, - **kwargs, - ): - """ - Initialize the CloudWatch EMF exporter. - - Args: - namespace: CloudWatch namespace for metrics - log_group_name: CloudWatch log group name - log_stream_name: CloudWatch log stream name (auto-generated if None) - aws_region: AWS region (auto-detected if None) - preferred_temporality: Optional dictionary mapping instrument types to aggregation temporality - **kwargs: Additional arguments passed to botocore client - """ - # Set up temporality preference default to DELTA if customers not set - if preferred_temporality is None: - preferred_temporality = { - Counter: AggregationTemporality.DELTA, - Histogram: AggregationTemporality.DELTA, - ObservableCounter: AggregationTemporality.DELTA, - ObservableGauge: AggregationTemporality.DELTA, - ObservableUpDownCounter: AggregationTemporality.DELTA, - UpDownCounter: AggregationTemporality.DELTA, - } - - super().__init__(preferred_temporality) - - self.namespace = namespace - self.log_group_name = log_group_name - self.log_stream_name = log_stream_name or self._generate_log_stream_name() - - session = botocore.session.Session() - self.logs_client = session.create_client("logs", region_name=aws_region, **kwargs) - - # Ensure log group exists - self._ensure_log_group_exists() - - # Ensure log stream exists - self._ensure_log_stream_exists() - - # Default to unique log stream name matching OTel Collector - # EMF Exporter behavior with language for source identification - def _generate_log_stream_name(self) -> str: - """Generate a unique log stream name.""" - - unique_id = str(uuid.uuid4())[:8] - return f"otel-python-{unique_id}" - - def _ensure_log_group_exists(self): - """Ensure the log group exists, create if it doesn't.""" - try: - self.logs_client.create_log_group(logGroupName=self.log_group_name) - logger.info("Created log group: %s", self.log_group_name) - except ClientError as error: - if error.response.get("Error", {}).get("Code") == "ResourceAlreadyExistsException": - logger.debug("Log group %s already exists", self.log_group_name) - else: - logger.error("Failed to create log group %s : %s", self.log_group_name, error) - raise - - def _ensure_log_stream_exists(self): - try: - self.logs_client.create_log_stream(logGroupName=self.log_group_name, logStreamName=self.log_stream_name) - logger.info("Created log stream: %s", self.log_stream_name) - except ClientError as error: - if error.response.get("Error", {}).get("Code") == "ResourceAlreadyExistsException": - logger.debug("Log stream %s already exists", self.log_stream_name) - else: - logger.error("Failed to create log stream %s : %s", self.log_group_name, error) - raise - - def _get_metric_name(self, record: MetricRecord) -> Optional[str]: - """Get the metric name from the metric record or data point.""" - - try: - if record.name: - return record.name - except AttributeError: - pass - # Return None if no valid metric name found - return None - - def _get_unit(self, record: MetricRecord) -> Optional[str]: - """Get CloudWatch unit from MetricRecord unit.""" - unit = record.unit - - if not unit: - return None - - # First check if unit is already a supported EMF unit - if unit in self.EMF_SUPPORTED_UNITS: - return unit - - # Map from OTel unit to CloudWatch unit - mapped_unit = self.UNIT_MAPPING.get(unit) - - return mapped_unit - - def _get_dimension_names(self, attributes: Attributes) -> List[str]: - """Extract dimension names from attributes.""" - # Implement dimension selection logic - # For now, use all attributes as dimensions - return list(attributes.keys()) - - def _get_attributes_key(self, attributes: Attributes) -> str: - """ - Create a hashable key from attributes for grouping metrics. - - Args: - attributes: The attributes dictionary - - Returns: - A string representation of sorted attributes key-value pairs - """ - # Sort the attributes to ensure consistent keys - sorted_attrs = sorted(attributes.items()) - # Create a string representation of the attributes - return str(sorted_attrs) - - def _normalize_timestamp(self, timestamp_ns: int) -> int: - """ - Normalize a nanosecond timestamp to milliseconds for CloudWatch. - - Args: - timestamp_ns: Timestamp in nanoseconds - - Returns: - Timestamp in milliseconds - """ - # Convert from nanoseconds to milliseconds - return timestamp_ns // 1_000_000 - - def _create_metric_record(self, metric_name: str, metric_unit: str, metric_description: str) -> MetricRecord: - """ - Creates the intermediate metric data structure that standardizes different otel metric representation - and will be used to generate EMF events. The base record - establishes the instrument schema (name/unit/description) that will be populated - with dimensions, timestamps, and values during metric processing. - - Args: - metric_name: Name of the metric - metric_unit: Unit of the metric - metric_description: Description of the metric - - Returns: - A MetricRecord object - """ - return MetricRecord(metric_name, metric_unit, metric_description) - - def _convert_gauge(self, metric: Metric, data_point: NumberDataPoint) -> MetricRecord: - """Convert a Gauge metric datapoint to a metric record. - - Args: - metric: The metric object - data_point: The datapoint to convert - - Returns: - MetricRecord with populated timestamp, attributes, and value - """ - # Create base record - record = self._create_metric_record(metric.name, metric.unit, metric.description) - - # Set timestamp - try: - timestamp_ms = ( - self._normalize_timestamp(data_point.time_unix_nano) - if data_point.time_unix_nano is not None - else int(time.time() * 1000) - ) - except AttributeError: - # data_point doesn't have time_unix_nano attribute - timestamp_ms = int(time.time() * 1000) - record.timestamp = timestamp_ms - - # Set attributes - try: - record.attributes = data_point.attributes - except AttributeError: - # data_point doesn't have attributes - record.attributes = {} - - # For Gauge, set the value directly - try: - record.value = data_point.value - except AttributeError: - # data_point doesn't have value - record.value = None - - return record - - def _group_by_attributes_and_timestamp(self, record: MetricRecord) -> Tuple[str, int]: - """Group metric record by attributes and timestamp. - - Args: - record: The metric record - - Returns: - A tuple key for grouping - """ - # Create a key for grouping based on attributes - attrs_key = self._get_attributes_key(record.attributes) - return (attrs_key, record.timestamp) - - def _create_emf_log( - self, metric_records: List[MetricRecord], resource: Resource, timestamp: Optional[int] = None - ) -> Dict: - """ - Create EMF log dictionary from metric records. - - Since metric_records is already grouped by attributes, this function - creates a single EMF log for all records. - """ - # Start with base structure - emf_log = {"_aws": {"Timestamp": timestamp or int(time.time() * 1000), "CloudWatchMetrics": []}} - - # Set with latest EMF version schema - # opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/metric_translator.go#L414 - emf_log["Version"] = "1" - - # Add resource attributes to EMF log but not as dimensions - # OTel collector EMF Exporter has a resource_to_telemetry_conversion flag that will convert resource attributes - # as regular metric attributes(potential dimensions). However, for this SDK EMF implementation, - # we align with the OpenTelemetry concept that all metric attributes are treated as dimensions. - # And have resource attributes as just additional metadata in EMF, added otel.resource as prefix to distinguish. - if resource and resource.attributes: - for key, value in resource.attributes.items(): - emf_log[f"otel.resource.{key}"] = str(value) - - # Initialize collections for dimensions and metrics - metric_definitions = [] - # Collect attributes from all records (they should be the same for all records in the group) - # Only collect once from the first record and apply to all records - all_attributes = metric_records[0].attributes if metric_records and metric_records[0].attributes else {} - - # Process each metric record - for record in metric_records: - - metric_name = self._get_metric_name(record) - - # Skip processing if metric name is None or empty - if not metric_name: - continue - - # Skip processing if metric value is None or empty - if record.value is None: - logger.debug("Skipping metric %s as it does not have valid metric value", metric_name) - continue - - # Create metric data dict - metric_data = {"Name": metric_name} - - unit = self._get_unit(record) - if unit: - metric_data["Unit"] = unit - - # Add to metric definitions list - metric_definitions.append(metric_data) - - emf_log[metric_name] = record.value - - # Get dimension names from collected attributes - dimension_names = self._get_dimension_names(all_attributes) - - # Add attribute values to the root of the EMF log - for name, value in all_attributes.items(): - emf_log[name] = str(value) - - # Add the single dimension set to CloudWatch Metrics if we have dimensions and metrics - if dimension_names and metric_definitions: - emf_log["_aws"]["CloudWatchMetrics"].append( - {"Namespace": self.namespace, "Dimensions": [dimension_names], "Metrics": metric_definitions} - ) - - return emf_log - - # pylint: disable=no-member - def _send_log_event(self, log_event: Dict[str, Any]): - """ - Send a log event to CloudWatch Logs. - - Basic implementation for PR 1 - sends individual events directly. - - TODO: Batching event and follow CloudWatch Logs quato constraints - number of events & size limit per payload - """ - try: - # Send the log event - response = self.logs_client.put_log_events( - logGroupName=self.log_group_name, logStreamName=self.log_stream_name, logEvents=[log_event] - ) - - logger.debug("Successfully sent log event") - return response - - except ClientError as error: - logger.debug("Failed to send log event: %s", error) - raise - - # pylint: disable=too-many-nested-blocks - def export( - self, metrics_data: MetricsData, timeout_millis: Optional[int] = None, **kwargs: Any - ) -> MetricExportResult: - """ - Export metrics as EMF logs to CloudWatch. - - Groups metrics by attributes and timestamp before creating EMF logs. - - Args: - metrics_data: MetricsData containing resource metrics and scope metrics - timeout_millis: Optional timeout in milliseconds - **kwargs: Additional keyword arguments - - Returns: - MetricExportResult indicating success or failure - """ - try: - if not metrics_data.resource_metrics: - return MetricExportResult.SUCCESS - - # Process all metrics from all resource metrics and scope metrics - for resource_metrics in metrics_data.resource_metrics: - for scope_metrics in resource_metrics.scope_metrics: - # Dictionary to group metrics by attributes and timestamp - grouped_metrics = defaultdict(list) - - # Process all metrics in this scope - for metric in scope_metrics.metrics: - # Skip if metric.data is None or no data_points exists - try: - if not (metric.data and metric.data.data_points): - continue - except AttributeError: - # Metric doesn't have data or data_points attribute - continue - - # Process metrics based on type - metric_type = type(metric.data) - if metric_type == Gauge: - for dp in metric.data.data_points: - record = self._convert_gauge(metric, dp) - grouped_metrics[self._group_by_attributes_and_timestamp(record)].append(record) - else: - logger.debug("Unsupported Metric Type: %s", metric_type) - - # Now process each group separately to create one EMF log per group - for (_, timestamp_ms), metric_records in grouped_metrics.items(): - if not metric_records: - continue - - # Create and send EMF log for this batch of metrics - self._send_log_event( - { - "message": json.dumps( - self._create_emf_log(metric_records, resource_metrics.resource, timestamp_ms) - ), - "timestamp": timestamp_ms, - } - ) - - return MetricExportResult.SUCCESS - # pylint: disable=broad-exception-caught - # capture all types of exceptions to not interrupt the instrumented services - except Exception as error: - logger.error("Failed to export metrics: %s", error) - return MetricExportResult.FAILURE - - def force_flush(self, timeout_millis: int = 10000) -> bool: - """ - Force flush any pending metrics. - - TODO: will add logic to handle gracefule shutdown - - Args: - timeout_millis: Timeout in milliseconds - - Returns: - True if successful, False otherwise - """ - logger.debug("AwsCloudWatchEmfExporter force flushes the buffered metrics") - return True - - def shutdown(self, timeout_millis: Optional[int] = None, **kwargs: Any) -> bool: - """ - Shutdown the exporter. - Override to handle timeout and other keyword arguments, but do nothing. - - TODO: will add logic to handle gracefule shutdown - - Args: - timeout_millis: Ignored timeout in milliseconds - **kwargs: Ignored additional keyword arguments - """ - # Intentionally do nothing - self.force_flush(timeout_millis) - logger.debug("AwsCloudWatchEmfExporter shutdown called with timeout_millis=%s", timeout_millis) - return True diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 5c4646612..93f507916 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -105,8 +105,8 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header delays = mock_sleep.call_args_list - for i, delay in enumerate(delays): - self.assertEqual(delay[0][0], 2**i) + for index, delay in enumerate(delays): + self.assertEqual(delay[0][0], 2**index) # Number of calls: 1 + len(1, 2, 4, 8, 16, 32 delays) self.assertEqual(mock_request.call_count, 7) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py index 3ea6031c3..01d500c70 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py @@ -8,7 +8,7 @@ from botocore.exceptions import ClientError -from amazon.opentelemetry.distro.exporter.otlp.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter +from amazon.opentelemetry.distro.exporter.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter from opentelemetry.sdk.metrics.export import Gauge, MetricExportResult from opentelemetry.sdk.resources import Resource From 12eca32e9a730ec01af0481760ee7ee668199916 Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:35:00 +0000 Subject: [PATCH 06/17] linting fix --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index f12c9330f..40a8dad84 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -42,7 +42,7 @@ def __init__( self._exporter = exporter # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 - def _export(self, batch_strategy: BatchLogExportStrategy) -> None: + def _export(self, batch_strategy: BatchLogExportStrategy) -> None: # pylint: disable=too-many-nested-blocks """ Preserves existing batching behavior but will intermediarly export small log batches if the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB. From 83ec370c7650c5a49e1ec736b0cdef3ae313e398 Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:36:49 +0000 Subject: [PATCH 07/17] linting fix --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 40a8dad84..066217498 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -42,7 +42,7 @@ def __init__( self._exporter = exporter # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 - def _export(self, batch_strategy: BatchLogExportStrategy) -> None: # pylint: disable=too-many-nested-blocks + def _export(self, batch_strategy: BatchLogExportStrategy) -> None: """ Preserves existing batching behavior but will intermediarly export small log batches if the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB. @@ -66,7 +66,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: # pylint: di log_data: LogData = self._queue.pop() log_size = self._estimate_log_size(log_data) - if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): + if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): # pylint: disable=too-many-nested-blocks # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: if self._is_gen_ai_log(batch[0]): From 79bbf464d4c123c9bb02d19f86f6b9f2adf9046b Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:39:14 +0000 Subject: [PATCH 08/17] linting fix --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 066217498..3e77e710f 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -66,7 +66,9 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: log_data: LogData = self._queue.pop() log_size = self._estimate_log_size(log_data) - if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): # pylint: disable=too-many-nested-blocks + if batch and ( + batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE + ): # pylint: disable=too-many-nested-blocks # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: if self._is_gen_ai_log(batch[0]): From b6e1b97fff4ea01c892192d9ebbcd0328e766835 Mon Sep 17 00:00:00 2001 From: liustve Date: Mon, 23 Jun 2025 22:27:17 +0000 Subject: [PATCH 09/17] remove gen ai handling logic --- .../logs/aws_batch_log_record_processor.py | 42 +++------- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 79 +++---------------- .../test_aws_batch_log_record_processor.py | 42 ++-------- .../aws/logs/test_otlp_aws_logs_exporter.py | 18 ----- 4 files changed, 28 insertions(+), 153 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 3e77e710f..737463cf5 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -19,7 +19,8 @@ class AwsBatchLogRecordProcessor(BatchLogRecordProcessor): 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation ) _MAX_LOG_REQUEST_BYTE_SIZE = ( - 1048576 # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html + 1048576 # Maximum uncompressed/unserialized bytes / request - + # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html ) def __init__( @@ -41,11 +42,15 @@ def __init__( self._exporter = exporter - # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 def _export(self, batch_strategy: BatchLogExportStrategy) -> None: """ + + Explictily overrides upstream _export method to add AWS CloudWatch size-based batching + See: + https://github.com/open-telemetry/opentelemetry-python/blob/bb21ebd46d070c359eee286c97bdf53bfd06759d/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 + Preserves existing batching behavior but will intermediarly export small log batches if - the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB. + the size of the data in the batch is at or above AWS CloudWatch's maximum request size limit of 1 MB. - Data size of exported batches will ALWAYS be <= 1 MB except for the case below: - If the data size of an exported batch is ever > 1 MB then the batch size is guaranteed to be 1 @@ -66,14 +71,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: log_data: LogData = self._queue.pop() log_size = self._estimate_log_size(log_data) - if batch and ( - batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE - ): # pylint: disable=too-many-nested-blocks - # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 - if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: - if self._is_gen_ai_log(batch[0]): - self._exporter.set_gen_ai_log_flag() - + if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): self._exporter.export(batch) batch_data_size = 0 batch = [] @@ -82,11 +80,6 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: batch.append(log_data) if batch: - # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 - if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: - if self._is_gen_ai_log(batch[0]): - self._exporter.set_gen_ai_log_flag() - self._exporter.export(batch) except Exception as exception: # pylint: disable=broad-exception-caught _logger.exception("Exception while exporting logs: %s", exception) @@ -97,7 +90,7 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: Estimates the size in bytes of a log by calculating the size of its body and its attributes and adding a buffer amount to account for other log metadata information. Will process complex log structures up to the specified depth limit. - If the depth limit of the log structure is exceeded, returns truncates calculation + If the depth limit of the log structure is exceeded, returns the truncated calculation to everything up to that point. Args: @@ -153,18 +146,3 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: queue = new_queue return size - - @staticmethod - def _is_gen_ai_log(log: LogData) -> bool: - """ - Is the log a Gen AI log event? - """ - gen_ai_instrumentations = { - "openinference.instrumentation.langchain", - "openinference.instrumentation.crewai", - "opentelemetry.instrumentation.langchain", - "crewai.telemetry", - "openlit.otel.tracing", - } - - return log.instrumentation_scope.name in gen_ai_instrumentations diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 9bd75d03f..26ec07849 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -8,6 +8,7 @@ from typing import Dict, Optional, Sequence import requests +from requests.exceptions import ConnectionError from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession from opentelemetry.exporter.otlp.proto.common._internal import _create_exp_backoff_generator @@ -21,43 +22,8 @@ class OTLPAwsLogExporter(OTLPLogExporter): - """ - Below is the protobuf-JSON formatted path to "content" and "role" for the - following GenAI Consolidated Log Event Schema: - - "body": { - "output": { - "messages": [ - { - "content": "hi", - "role": "assistant" - } - ] - }, - "input": { - "messages": [ - { - "content": "hello", - "role": "user" - } - ] - } - } - - """ - - _LARGE_GEN_AI_LOG_PATH_HEADER = ( - "\\$['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['body']" # body - "['kvlistValue']['values'][*]['value']" # body['output'], body['input'] - "['kvlistValue']['values'][0]['value']" # body['output']['messages'], body['input']['messages'] - "['arrayValue']['values'][*]" # body['output']['messages'][0..999], body['input']['messages'][0..999] - "['kvlistValue']['values'][*]['value']['stringValue']" # body['output']['messages'][0..999]['content'/'role'], - # body['input']['messages'][0..999]['content'/'role'] - ) - - _LARGE_LOG_HEADER = "x-aws-truncatable-fields" - - _RETRY_AFTER_HEADER = "Retry-After" # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + + _RETRY_AFTER_HEADER = "Retry-After" # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling def __init__( self, @@ -86,24 +52,17 @@ def __init__( session=AwsAuthSession(aws_region=self._aws_region, service="logs"), ) - # https://github.com/open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py#L167 def export(self, batch: Sequence[LogData]) -> LogExportResult: """ - Exports the given batch of OTLP log data. - Behaviors of how this export will work - - - 1. Always compresses the serialized data into gzip before sending. + Exports log batch with AWS-specific enhancements over the base OTLPLogExporter. - 2. If self._gen_ai_log_flag is enabled, the log data is > 1 MB a - and the assumption is that the log is a normalized gen.ai LogEvent. - - inject the {LARGE_LOG_HEADER} into the header. + Based on upstream implementation which does not retry based on Retry-After header: + https://github.com/open-telemetry/opentelemetry-python/blob/acae2c232b101d3e447a82a7161355d66aa06fa2/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py#L167 - 3. Retry behavior is now the following: - - if the response contains a status code that is retryable and the response contains Retry-After in its - headers, the serialized data will be exported after that set delay - - - if the response does not contain that Retry-After header, default back to the current iteration of the - exponential backoff delay + Key behaviors: + 1. Always compresses data with gzip before sending + 2. Adds truncatable fields header for large Gen AI logs (>1MB) + 3. Implements Retry-After header support for throttling responses """ if self._shutdown: @@ -111,11 +70,9 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: return LogExportResult.FAILURE serialized_data = encode_logs(batch).SerializeToString() - gzip_data = BytesIO() with gzip.GzipFile(fileobj=gzip_data, mode="w") as gzip_stream: gzip_stream.write(serialized_data) - data = gzip_data.getvalue() backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) @@ -132,10 +89,9 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: resp.status_code, resp.text, ) - self._gen_ai_log_flag = False return LogExportResult.FAILURE - # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling maybe_retry_after = resp.headers.get(self._RETRY_AFTER_HEADER, None) # Set the next retry delay to the value of the Retry-After response in the headers. @@ -154,7 +110,6 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: "Logs will not be exported.", resp.reason, ) - self._gen_ai_log_flag = False return LogExportResult.FAILURE _logger.warning( @@ -165,28 +120,19 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: sleep(delay) - def set_gen_ai_log_flag(self): - """ - Sets a flag that indicates the current log batch contains - a generative AI log record that exceeds the CloudWatch Logs size limit (1MB). - """ - self._gen_ai_log_flag = True - def _send(self, serialized_data: bytes): try: response = self._session.post( url=self._endpoint, - headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, data=serialized_data, verify=self._certificate_file, timeout=self._timeout, cert=self._client_cert, ) return response - except requests.exceptions.ConnectionError: + except ConnectionError: response = self._session.post( url=self._endpoint, - headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, data=serialized_data, verify=self._certificate_file, timeout=self._timeout, @@ -199,6 +145,7 @@ def _retryable(resp: requests.Response) -> bool: """ Is it a retryable response? """ + # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling return resp.status_code in (429, 503) or OTLPLogExporter._retryable(resp) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 8e639606c..365dd1c08 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -142,7 +142,6 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): self.assertEqual(len(self.processor._queue), 0) self.assertEqual(len(actual_batch), log_count) self.mock_exporter.export.assert_called_once() - self.mock_exporter.set_gen_ai_log_flag.assert_not_called() @patch( "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", @@ -151,37 +150,12 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): - """Should make multiple export calls of batch size 1 to export logs of size > 1 MB. - But should only call set_gen_ai_log_flag if it's a Gen AI log event.""" + """Should make multiple export calls of batch size 1 to export logs of size > 1 MB.""" large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) - non_gen_ai_test_logs = self.generate_test_log_data( - log_body=large_log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=3 + test_logs = self.generate_test_log_data( + log_body=large_log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=15 ) - gen_ai_test_logs = [] - - gen_ai_scopes = [ - "openinference.instrumentation.langchain", - "openinference.instrumentation.crewai", - "opentelemetry.instrumentation.langchain", - "crewai.telemetry", - "openlit.otel.tracing", - ] - - for gen_ai_scope in gen_ai_scopes: - gen_ai_test_logs.extend( - self.generate_test_log_data( - log_body=large_log_body, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=3, - instrumentation_scope=InstrumentationScope(gen_ai_scope, "1.0.0"), - ) - ) - - test_logs = gen_ai_test_logs + non_gen_ai_test_logs for log in test_logs: self.processor._queue.appendleft(log) @@ -189,8 +163,7 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): self.processor._export(batch_strategy=BatchLogExportStrategy.EXPORT_ALL) self.assertEqual(len(self.processor._queue), 0) - self.assertEqual(self.mock_exporter.export.call_count, 3 + len(gen_ai_test_logs)) - self.assertEqual(self.mock_exporter.set_gen_ai_log_flag.call_count, len(gen_ai_test_logs)) + self.assertEqual(self.mock_exporter.export.call_count, len(test_logs)) batches = self.mock_exporter.export.call_args_list @@ -208,8 +181,6 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): large_log_body = "X" * (self.max_log_size + 1) small_log_body = "X" * (self.max_log_size // 10 - self.base_log_size) - gen_ai_scope = InstrumentationScope("openinference.instrumentation.langchain", "1.0.0") - large_logs = self.generate_test_log_data( log_body=large_log_body, attr_key="", @@ -217,7 +188,6 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): log_body_depth=-1, attr_depth=-1, count=3, - instrumentation_scope=gen_ai_scope, ) small_logs = self.generate_test_log_data( @@ -227,7 +197,6 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): log_body_depth=-1, attr_depth=-1, count=12, - instrumentation_scope=gen_ai_scope, ) # 1st, 2nd, 3rd batch = size 1 @@ -242,7 +211,6 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): self.assertEqual(len(self.processor._queue), 0) self.assertEqual(self.mock_exporter.export.call_count, 5) - self.assertEqual(self.mock_exporter.set_gen_ai_log_flag.call_count, 3) batches = self.mock_exporter.export.call_args_list @@ -294,7 +262,7 @@ def generate_nested_value(depth, value, create_map=True) -> AnyValue: attributes={attr_key: generate_nested_value(attr_depth, attr_val, create_map)}, ) - log_data = LogData(log_record=record, instrumentation_scope=instrumentation_scope) + log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) logs.append(log_data) return logs diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 93f507916..31e401643 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -56,24 +56,6 @@ def test_export_success(self, mock_request): self.assertTrue(len(data) >= 10) self.assertEqual(data[0:2], b"\x1f\x8b") - @patch("requests.Session.post", return_value=good_response) - def test_export_gen_ai_logs(self, mock_request): - """Tests that when set_gen_ai_log_flag is set, the exporter includes the LLO header in the request.""" - - self.exporter.set_gen_ai_log_flag() - - result = self.exporter.export(self.logs) - - mock_request.assert_called_once() - - _, kwargs = mock_request.call_args - headers = kwargs.get("headers", None) - - self.assertEqual(result, LogExportResult.SUCCESS) - self.assertIsNotNone(headers) - self.assertIn(self.exporter._LARGE_LOG_HEADER, headers) - self.assertEqual(headers[self.exporter._LARGE_LOG_HEADER], self.exporter._LARGE_GEN_AI_LOG_PATH_HEADER) - @patch("requests.Session.post", return_value=good_response) def test_should_not_export_if_shutdown(self, mock_request): """Tests that no export request is made if the exporter is shutdown.""" From 17d0f90864818c8206575e150d5a3d7b5a908155 Mon Sep 17 00:00:00 2001 From: liustve Date: Mon, 23 Jun 2025 22:28:51 +0000 Subject: [PATCH 10/17] fixed linting --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 737463cf5..1fe961e33 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -45,7 +45,7 @@ def __init__( def _export(self, batch_strategy: BatchLogExportStrategy) -> None: """ - Explictily overrides upstream _export method to add AWS CloudWatch size-based batching + Explicitly overrides upstream _export method to add AWS CloudWatch size-based batching See: https://github.com/open-telemetry/opentelemetry-python/blob/bb21ebd46d070c359eee286c97bdf53bfd06759d/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 From 3d12858775fd4d3ebed6924fb0d41b64f53ea257 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 03:13:31 +0000 Subject: [PATCH 11/17] refactor _init_logging to 1.33.1 version --- .../distro/aws_opentelemetry_configurator.py | 57 +++++++++++-------- .../logs/aws_batch_log_record_processor.py | 1 - .../otlp/aws/logs/otlp_aws_logs_exporter.py | 9 ++- 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index e39c916c5..f62ed77da 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -1,9 +1,9 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. +import logging import os import re -from logging import NOTSET, Logger, getLogger from typing import ClassVar, Dict, List, Optional, Type, Union from importlib_metadata import version @@ -29,6 +29,7 @@ from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler from amazon.opentelemetry.distro.scope_based_exporter import ScopeBasedPeriodicExportingMetricReader from amazon.opentelemetry.distro.scope_based_filtering_view import ScopeBasedRetainingView +from opentelemetry._events import set_event_logger_provider from opentelemetry._logs import get_logger_provider, set_logger_provider from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter @@ -43,7 +44,9 @@ _import_id_generator, _import_sampler, _OTelSDKConfigurator, + _patch_basic_config, ) +from opentelemetry.sdk._events import EventLoggerProvider from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler from opentelemetry.sdk._logs.export import BatchLogRecordProcessor, LogExporter from opentelemetry.sdk.environment_variables import ( @@ -133,7 +136,7 @@ def _configure(self, **kwargs): # The OpenTelemetry Authors code # Long term, we wish to contribute this to upstream to improve initialization customizability and reduce dependency on # internal logic. -def _initialize_components(): +def _initialize_components(setup_logging_handler: bool | None = None): trace_exporters, metric_exporters, log_exporters = _import_exporters( _get_exporter_names("traces"), _get_exporter_names("metrics"), @@ -170,38 +173,37 @@ def _initialize_components(): resource=resource, ) _init_metrics(metric_exporters, resource) - logging_enabled = os.getenv(_OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, "false") - if logging_enabled.strip().lower() == "true": - _init_logging(log_exporters, resource) + + if setup_logging_handler is None: + setup_logging_handler = ( + os.getenv(_OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, "false").strip().lower() == "true" + ) + _init_logging(log_exporters, resource, setup_logging_handler) def _init_logging( - exporters: Dict[str, Type[LogExporter]], - resource: Resource = None, + exporters: dict[str, Type[LogExporter]], + resource: Resource | None = None, + setup_logging_handler: bool = True, ): - - # Provides a default OTLP log exporter when it's not set. - # This is the behavior for the logs exporters for other languages. - logs_exporters = os.environ.get("OTEL_LOGS_EXPORTER") - - if not exporters and logs_exporters and logs_exporters.lower() != "none": - exporters = {"otlp": OTLPLogExporter} - provider = LoggerProvider(resource=resource) set_logger_provider(provider) for _, exporter_class in exporters.items(): - exporter_args: Dict[str, any] = {} - log_exporter = _customize_logs_exporter(exporter_class(**exporter_args), resource) + exporter_args = {} + log_exporter: LogExporter = _customize_logs_exporter(exporter_class(**exporter_args)) + log_processor = _customize_log_record_processor(log_exporter) + provider.add_log_record_processor(log_processor) - if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): - provider.add_log_record_processor(AwsBatchLogRecordProcessor(exporter=log_exporter)) - else: - provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter)) + event_logger_provider = EventLoggerProvider(logger_provider=provider) + set_event_logger_provider(event_logger_provider) - handler = LoggingHandler(level=NOTSET, logger_provider=provider) + if setup_logging_handler: + _patch_basic_config() - getLogger().addHandler(handler) + # Add OTel handler + handler = LoggingHandler(level=logging.NOTSET, logger_provider=provider) + logging.getLogger().addHandler(handler) def _init_tracing( @@ -390,7 +392,14 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> return AwsMetricAttributesSpanExporterBuilder(span_exporter, resource).build() -def _customize_logs_exporter(log_exporter: LogExporter, resource: Resource) -> LogExporter: +def _customize_log_record_processor(log_exporter: LogExporter): + if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): + return AwsBatchLogRecordProcessor(exporter=log_exporter) + + return BatchLogRecordProcessor(exporter=log_exporter) + + +def _customize_logs_exporter(log_exporter: LogExporter) -> LogExporter: logs_endpoint = os.environ.get(OTEL_EXPORTER_OTLP_LOGS_ENDPOINT) if _is_aws_otlp_endpoint(logs_endpoint, "logs"): diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 1fe961e33..08a99ced5 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -44,7 +44,6 @@ def __init__( def _export(self, batch_strategy: BatchLogExportStrategy) -> None: """ - Explicitly overrides upstream _export method to add AWS CloudWatch size-based batching See: https://github.com/open-telemetry/opentelemetry-python/blob/bb21ebd46d070c359eee286c97bdf53bfd06759d/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 26ec07849..fd9830d9a 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -7,7 +7,7 @@ from time import sleep from typing import Dict, Optional, Sequence -import requests +from requests import Response from requests.exceptions import ConnectionError from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession @@ -34,7 +34,6 @@ def __init__( headers: Optional[Dict[str, str]] = None, timeout: Optional[int] = None, ): - self._gen_ai_log_flag = False self._aws_region = None if endpoint: @@ -77,6 +76,10 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) + # This loop will eventually exit via one of three conditions: + # 1. Successful response (resp.ok) + # 2. Non-retryable error (4xx status codes except 429) + # 3. Retry exponential backoff timeout exhausted and no Retry-After header available while True: resp = self._send(data) @@ -141,7 +144,7 @@ def _send(self, serialized_data: bytes): return response @staticmethod - def _retryable(resp: requests.Response) -> bool: + def _retryable(resp: Response) -> bool: """ Is it a retryable response? """ From 7f90bc79de823b562d6176606fc1e2d9f37fdd33 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 03:53:45 +0000 Subject: [PATCH 12/17] refactored batch log record processor --- .../distro/aws_opentelemetry_configurator.py | 6 +- .../logs/aws_batch_log_record_processor.py | 15 ++++- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 66 ++++++++++--------- .../test_aws_batch_log_record_processor.py | 4 +- 4 files changed, 54 insertions(+), 37 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index f62ed77da..aa181de43 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -22,7 +22,7 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsBatchLogRecordProcessor +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsCloudWatchOtlpBatchLogRecordProcessor from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter @@ -106,7 +106,7 @@ # UDP package size is not larger than 64KB LAMBDA_SPAN_EXPORT_BATCH_SIZE = 10 -_logger: Logger = getLogger(__name__) +_logger: logging.Logger = logging.getLogger(__name__) class AwsOpenTelemetryConfigurator(_OTelSDKConfigurator): @@ -394,7 +394,7 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> def _customize_log_record_processor(log_exporter: LogExporter): if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): - return AwsBatchLogRecordProcessor(exporter=log_exporter) + return AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=log_exporter) return BatchLogRecordProcessor(exporter=log_exporter) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 08a99ced5..dc81875bc 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -14,7 +14,20 @@ _logger = logging.getLogger(__name__) -class AwsBatchLogRecordProcessor(BatchLogRecordProcessor): +class AwsCloudWatchOtlpBatchLogRecordProcessor(BatchLogRecordProcessor): + """ + Custom implementation of BatchLogRecordProcessor that manages log record batching + with size-based constraints to prevent exceeding AWS CloudWatch Logs OTLP endpoint request size limits. + + This processor still exports all logs up to _max_export_batch_size but rather than doing exactly + one export, we will estimate log sizes and do multiple batch exports + where each exported batch will have an additonal constraint: + + If the batch to be exported will have a data size of > 1 MB: + The batch will be split into multiple exports of sub-batches of data size <= 1 MB. + + A unique case is if the sub-batch is of data size > 1 MB, then the sub-batch will have exactly 1 log in it. + """ _BASE_LOG_BUFFER_BYTE_SIZE = ( 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation ) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index fd9830d9a..8ce8f1a8b 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -9,6 +9,7 @@ from requests import Response from requests.exceptions import ConnectionError +from requests.structures import CaseInsensitiveDict from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession from opentelemetry.exporter.otlp.proto.common._internal import _create_exp_backoff_generator @@ -76,43 +77,28 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) - # This loop will eventually exit via one of three conditions: - # 1. Successful response (resp.ok) - # 2. Non-retryable error (4xx status codes except 429) - # 3. Retry exponential backoff timeout exhausted and no Retry-After header available while True: resp = self._send(data) if resp.ok: return LogExportResult.SUCCESS - if not self._retryable(resp): - _logger.error( - "Failed to export logs batch code: %s, reason: %s", - resp.status_code, - resp.text, - ) - return LogExportResult.FAILURE - - # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling - maybe_retry_after = resp.headers.get(self._RETRY_AFTER_HEADER, None) - - # Set the next retry delay to the value of the Retry-After response in the headers. - # If Retry-After is not present in the headers, default to the next iteration of the - # exponential backoff strategy. - - delay = self._parse_retryable_header(maybe_retry_after) - - if delay == -1: - delay = next(backoff, self._MAX_RETRY_TIMEOUT) - - if delay == self._MAX_RETRY_TIMEOUT: - _logger.error( - "Transient error %s encountered while exporting logs batch. " - "No Retry-After header found and all backoff retries exhausted. " - "Logs will not be exported.", - resp.reason, - ) + delay = self._get_retry_delay_sec(resp.headers, backoff) + is_retryable = self._retryable(resp) + + if not is_retryable or delay == self._MAX_RETRY_TIMEOUT: + if is_retryable: + _logger.error( + "Failed to export logs due to retries exhausted " + "after transient error %s encountered while exporting logs batch", + resp.reason, + ) + else: + _logger.error( + "Failed to export logs batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) return LogExportResult.FAILURE _logger.warning( @@ -152,6 +138,24 @@ def _retryable(resp: Response) -> bool: return resp.status_code in (429, 503) or OTLPLogExporter._retryable(resp) + def _get_retry_delay_sec(self, headers: CaseInsensitiveDict, backoff) -> float: + """ + Get retry delay in seconds from headers or backoff strategy. + """ + # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + maybe_retry_after = headers.get(self._RETRY_AFTER_HEADER, None) + + # Set the next retry delay to the value of the Retry-After response in the headers. + # If Retry-After is not present in the headers, default to the next iteration of the + # exponential backoff strategy. + + delay = self._parse_retryable_header(maybe_retry_after) + + if delay == -1: + delay = next(backoff, self._MAX_RETRY_TIMEOUT) + + return delay + @staticmethod def _parse_retryable_header(retry_header: Optional[str]) -> float: """ diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 365dd1c08..62673c566 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -6,7 +6,7 @@ from unittest.mock import MagicMock, patch from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( - AwsBatchLogRecordProcessor, + AwsCloudWatchOtlpBatchLogRecordProcessor, BatchLogExportStrategy, ) from opentelemetry._logs.severity import SeverityNumber @@ -23,7 +23,7 @@ def setUp(self): self.mock_exporter = MagicMock() self.mock_exporter.export.return_value = LogExportResult.SUCCESS - self.processor = AwsBatchLogRecordProcessor(exporter=self.mock_exporter) + self.processor = AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=self.mock_exporter) self.max_log_size = self.processor._MAX_LOG_REQUEST_BYTE_SIZE self.base_log_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE From 4b7bb0e12dabf7232658c28a1ce5fa846a8d4cca Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 03:59:05 +0000 Subject: [PATCH 13/17] linting --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 4 +++- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index aa181de43..f6ce7fb57 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -22,7 +22,9 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsCloudWatchOtlpBatchLogRecordProcessor +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( + AwsCloudWatchOtlpBatchLogRecordProcessor, +) from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index dc81875bc..0568c9296 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -21,13 +21,14 @@ class AwsCloudWatchOtlpBatchLogRecordProcessor(BatchLogRecordProcessor): This processor still exports all logs up to _max_export_batch_size but rather than doing exactly one export, we will estimate log sizes and do multiple batch exports - where each exported batch will have an additonal constraint: + where each exported batch will have an additional constraint: If the batch to be exported will have a data size of > 1 MB: The batch will be split into multiple exports of sub-batches of data size <= 1 MB. A unique case is if the sub-batch is of data size > 1 MB, then the sub-batch will have exactly 1 log in it. """ + _BASE_LOG_BUFFER_BYTE_SIZE = ( 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation ) From 8c64adbdda1c3e5536ec5dfcee4629b292a56e81 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 04:00:57 +0000 Subject: [PATCH 14/17] lint fix --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index f6ce7fb57..863ca0dff 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -138,7 +138,7 @@ def _configure(self, **kwargs): # The OpenTelemetry Authors code # Long term, we wish to contribute this to upstream to improve initialization customizability and reduce dependency on # internal logic. -def _initialize_components(setup_logging_handler: bool | None = None): +def _initialize_components(setup_logging_handler: Optional[bool] = None): trace_exporters, metric_exporters, log_exporters = _import_exporters( _get_exporter_names("traces"), _get_exporter_names("metrics"), From 01e3fd8da3704cea4e1cc90b6a6f54e3b96a0b0a Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 04:18:59 +0000 Subject: [PATCH 15/17] update configuration and tests --- .../distro/aws_opentelemetry_configurator.py | 2 +- .../distro/test_aws_opentelementry_configurator.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 863ca0dff..b9fe22afd 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -185,7 +185,7 @@ def _initialize_components(setup_logging_handler: Optional[bool] = None): def _init_logging( exporters: dict[str, Type[LogExporter]], - resource: Resource | None = None, + resource: Optional[Resource] = None, setup_logging_handler: bool = True, ): provider = LoggerProvider(resource=resource) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index dbaee3c33..5b81be9f8 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -496,6 +496,7 @@ def test_customize_span_exporter_sigv4(self): OTLPAwsSpanExporter, AwsAuthSession, Compression.NoCompression, + Resource.get_empty(), ) for config in bad_configs: @@ -506,6 +507,7 @@ def test_customize_span_exporter_sigv4(self): OTLPSpanExporter, Session, Compression.NoCompression, + Resource.get_empty(), ) self.assertIsInstance( @@ -610,12 +612,12 @@ def test_customize_logs_exporter_sigv4(self): ) self.assertIsInstance( - _customize_logs_exporter(OTLPGrpcLogExporter(), Resource.get_empty()), OTLPGrpcLogExporter + _customize_logs_exporter(OTLPGrpcLogExporter()), OTLPGrpcLogExporter ) # Need to patch all of these to prevent some weird multi-threading error with the LogProvider @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.LoggingHandler", return_value=MagicMock()) - @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.getLogger", return_value=MagicMock()) + @patch("logging.getLogger", return_value=MagicMock()) @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._customize_logs_exporter") @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.LoggerProvider", return_value=MagicMock()) @patch( @@ -832,12 +834,13 @@ def customize_exporter_test( expected_exporter_type, expected_session, expected_compression, + *args ): for key, value in config.items(): os.environ[key] = value try: - result = executor(default_exporter, Resource.get_empty()) + result = executor(default_exporter, *args) self.assertIsInstance(result, expected_exporter_type) self.assertIsInstance(result._session, expected_session) self.assertEqual(result._compression, expected_compression) From 2f0268cf5fd92ef4193998674b32ce18b9e4c5eb Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 04:21:09 +0000 Subject: [PATCH 16/17] lint fix --- .../distro/test_aws_opentelementry_configurator.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 5b81be9f8..5bd05677d 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -611,9 +611,7 @@ def test_customize_logs_exporter_sigv4(self): config, _customize_logs_exporter, OTLPLogExporter(), OTLPLogExporter, Session, Compression.NoCompression ) - self.assertIsInstance( - _customize_logs_exporter(OTLPGrpcLogExporter()), OTLPGrpcLogExporter - ) + self.assertIsInstance(_customize_logs_exporter(OTLPGrpcLogExporter()), OTLPGrpcLogExporter) # Need to patch all of these to prevent some weird multi-threading error with the LogProvider @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.LoggingHandler", return_value=MagicMock()) @@ -827,14 +825,7 @@ def test_customize_metric_exporter(self): os.environ.pop("OTEL_METRIC_EXPORT_INTERVAL", None) def customize_exporter_test( - self, - config, - executor, - default_exporter, - expected_exporter_type, - expected_session, - expected_compression, - *args + self, config, executor, default_exporter, expected_exporter_type, expected_session, expected_compression, *args ): for key, value in config.items(): os.environ[key] = value From 7dbcb7e9e6721872fab2816415129939e22f2996 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 04:26:10 +0000 Subject: [PATCH 17/17] linting fix --- .../distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 8ce8f1a8b..845a80ecb 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -8,7 +8,7 @@ from typing import Dict, Optional, Sequence from requests import Response -from requests.exceptions import ConnectionError +from requests.exceptions import ConnectionError as RequestsConnectionError from requests.structures import CaseInsensitiveDict from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession @@ -119,7 +119,7 @@ def _send(self, serialized_data: bytes): cert=self._client_cert, ) return response - except ConnectionError: + except RequestsConnectionError: response = self._session.post( url=self._endpoint, data=serialized_data,