Skip to content

Commit 0ea3ebd

Browse files
authored
Apply DB resource attributes. (#182)
Apply DB resource attributes: `RemoteResourceType` and `RemoteResourceIdentifier` with the following details. This PR have feature parity with adot Java: aws-observability/aws-otel-java-instrumentation#805 The only difference it python does not allow same function with different number of inputs as Java, So I'm renaming the second `_build_db_connection` to `_build_db_connection_string`. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent 0df8771 commit 0ea3ebd

File tree

3 files changed

+420
-25
lines changed

3 files changed

+420
-25
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py

Lines changed: 102 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
get_egress_operation,
3030
get_ingress_operation,
3131
is_aws_sdk_span,
32+
is_db_span,
3233
is_key_present,
3334
is_local_root,
3435
should_generate_dependency_metric_attributes,
@@ -46,6 +47,8 @@
4647

4748
# Pertinent OTEL attribute keys
4849
_SERVICE_NAME: str = ResourceAttributes.SERVICE_NAME
50+
_DB_CONNECTION_STRING: str = SpanAttributes.DB_CONNECTION_STRING
51+
_DB_NAME: str = SpanAttributes.DB_NAME
4952
_DB_OPERATION: str = SpanAttributes.DB_OPERATION
5053
_DB_STATEMENT: str = SpanAttributes.DB_STATEMENT
5154
_DB_SYSTEM: str = SpanAttributes.DB_SYSTEM
@@ -63,6 +66,10 @@
6366
_PEER_SERVICE: str = SpanAttributes.PEER_SERVICE
6467
_RPC_METHOD: str = SpanAttributes.RPC_METHOD
6568
_RPC_SERVICE: str = SpanAttributes.RPC_SERVICE
69+
_SERVER_ADDRESS: str = SpanAttributes.SERVER_ADDRESS
70+
_SERVER_PORT: str = SpanAttributes.SERVER_PORT
71+
_SERVER_SOCKET_ADDRESS: str = SpanAttributes.SERVER_SOCKET_ADDRESS
72+
_SERVER_SOCKET_PORT: str = SpanAttributes.SERVER_SOCKET_PORT
6673
_AWS_TABLE_NAMES: str = SpanAttributes.AWS_DYNAMODB_TABLE_NAMES
6774
_AWS_BUCKET_NAME: str = SpanAttributes.AWS_S3_BUCKET
6875

@@ -71,6 +78,7 @@
7178
_NORMALIZED_KINESIS_SERVICE_NAME: str = "AWS::Kinesis"
7279
_NORMALIZED_S3_SERVICE_NAME: str = "AWS::S3"
7380
_NORMALIZED_SQS_SERVICE_NAME: str = "AWS::SQS"
81+
_DB_CONNECTION_STRING_TYPE: str = "DB::Connection"
7482

7583
# Special DEPENDENCY attribute value if GRAPHQL_OPERATION_TYPE attribute key is present.
7684
_GRAPHQL: str = "graphql"
@@ -207,7 +215,7 @@ def _set_remote_service_and_operation(span: ReadableSpan, attributes: BoundedAtt
207215
elif is_key_present(span, _RPC_SERVICE) or is_key_present(span, _RPC_METHOD):
208216
remote_service = _normalize_remote_service_name(span, _get_remote_service(span, _RPC_SERVICE))
209217
remote_operation = _get_remote_operation(span, _RPC_METHOD)
210-
elif is_key_present(span, _DB_SYSTEM) or is_key_present(span, _DB_OPERATION) or is_key_present(span, _DB_STATEMENT):
218+
elif is_db_span(span):
211219
remote_service = _get_remote_service(span, _DB_SYSTEM)
212220
if is_key_present(span, _DB_OPERATION):
213221
remote_operation = _get_remote_operation(span, _DB_OPERATION)
@@ -336,6 +344,7 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri
336344
Remote resource attributes {@link AwsAttributeKeys#AWS_REMOTE_RESOURCE_TYPE} and {@link
337345
AwsAttributeKeys#AWS_REMOTE_RESOURCE_IDENTIFIER} are used to store information about the resource associated with
338346
the remote invocation, such as S3 bucket name, etc. We should only ever set both type and identifier or neither.
347+
If any identifier value contains | or ^ , they will be replaced with ^| or ^^.
339348
340349
AWS resources type and identifier adhere to <a
341350
href="https://docs.aws.amazon.com/cloudcontrolapi/latest/userguide/supported-resources.html">AWS Cloud Control
@@ -344,28 +353,104 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri
344353
remote_resource_type: Optional[str] = None
345354
remote_resource_identifier: Optional[str] = None
346355

347-
# Only extract the table name when _AWS_TABLE_NAMES has size equals to one
348-
if is_key_present(span, _AWS_TABLE_NAMES) and len(span.attributes.get(_AWS_TABLE_NAMES)) == 1:
349-
remote_resource_type = _NORMALIZED_DYNAMO_DB_SERVICE_NAME + "::Table"
350-
remote_resource_identifier = span.attributes.get(_AWS_TABLE_NAMES)[0]
351-
elif is_key_present(span, AWS_STREAM_NAME):
352-
remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream"
353-
remote_resource_identifier = span.attributes.get(AWS_STREAM_NAME)
354-
elif is_key_present(span, _AWS_BUCKET_NAME):
355-
remote_resource_type = _NORMALIZED_S3_SERVICE_NAME + "::Bucket"
356-
remote_resource_identifier = span.attributes.get(_AWS_BUCKET_NAME)
357-
elif is_key_present(span, AWS_QUEUE_NAME):
358-
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
359-
remote_resource_identifier = span.attributes.get(AWS_QUEUE_NAME)
360-
elif is_key_present(span, AWS_QUEUE_URL):
361-
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
362-
remote_resource_identifier = SqsUrlParser.get_queue_name(span.attributes.get(AWS_QUEUE_URL))
356+
if is_aws_sdk_span(span):
357+
# Only extract the table name when _AWS_TABLE_NAMES has size equals to one
358+
if is_key_present(span, _AWS_TABLE_NAMES) and len(span.attributes.get(_AWS_TABLE_NAMES)) == 1:
359+
remote_resource_type = _NORMALIZED_DYNAMO_DB_SERVICE_NAME + "::Table"
360+
remote_resource_identifier = _escape_delimiters(span.attributes.get(_AWS_TABLE_NAMES)[0])
361+
elif is_key_present(span, AWS_STREAM_NAME):
362+
remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream"
363+
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_STREAM_NAME))
364+
elif is_key_present(span, _AWS_BUCKET_NAME):
365+
remote_resource_type = _NORMALIZED_S3_SERVICE_NAME + "::Bucket"
366+
remote_resource_identifier = _escape_delimiters(span.attributes.get(_AWS_BUCKET_NAME))
367+
elif is_key_present(span, AWS_QUEUE_NAME):
368+
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
369+
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_QUEUE_NAME))
370+
elif is_key_present(span, AWS_QUEUE_URL):
371+
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
372+
remote_resource_identifier = _escape_delimiters(
373+
SqsUrlParser.get_queue_name(span.attributes.get(AWS_QUEUE_URL))
374+
)
375+
elif is_db_span(span):
376+
remote_resource_type = _DB_CONNECTION_STRING_TYPE
377+
remote_resource_identifier = _get_db_connection(span)
363378

364379
if remote_resource_type is not None and remote_resource_identifier is not None:
365380
attributes[AWS_REMOTE_RESOURCE_TYPE] = remote_resource_type
366381
attributes[AWS_REMOTE_RESOURCE_IDENTIFIER] = remote_resource_identifier
367382

368383

384+
def _get_db_connection(span: ReadableSpan) -> None:
385+
"""
386+
RemoteResourceIdentifier is populated with rule:
387+
^[{db.name}|]?{address}[|{port}]?
388+
389+
{address} attribute is retrieved in priority order:
390+
- {SpanAttributes.SERVER_ADDRESS},
391+
- {SpanAttributes.NET_PEER_NAME},
392+
- {SpanAttributes.SERVER_SOCKET_ADDRESS},
393+
- {SpanAttributes.DB_CONNECTION_STRING}-Hostname
394+
395+
{port} attribute is retrieved in priority order:
396+
- {SpanAttributes.SERVER_PORT},
397+
- {SpanAttributes.NET_PEER_PORT},
398+
- {SpanAttributes.SERVER_SOCKET_PORT},
399+
- {SpanAttributes.DB_CONNECTION_STRING}-Port
400+
401+
If address is not present, neither RemoteResourceType nor RemoteResourceIdentifier will be provided.
402+
"""
403+
db_name: Optional[str] = span.attributes.get(_DB_NAME)
404+
db_connection: Optional[str] = None
405+
406+
if is_key_present(span, _SERVER_ADDRESS):
407+
server_address: Optional[str] = span.attributes.get(_SERVER_ADDRESS)
408+
server_port: Optional[int] = span.attributes.get(_SERVER_PORT)
409+
db_connection = _build_db_connection(server_address, server_port)
410+
elif is_key_present(span, _NET_PEER_NAME):
411+
network_peer_address: Optional[str] = span.attributes.get(_NET_PEER_NAME)
412+
network_peer_port: Optional[int] = span.attributes.get(_NET_PEER_PORT)
413+
db_connection = _build_db_connection(network_peer_address, network_peer_port)
414+
elif is_key_present(span, _SERVER_SOCKET_ADDRESS):
415+
server_socket_address: Optional[str] = span.attributes.get(_SERVER_SOCKET_ADDRESS)
416+
server_socket_port: Optional[int] = span.attributes.get(_SERVER_SOCKET_PORT)
417+
db_connection = _build_db_connection(server_socket_address, server_socket_port)
418+
elif is_key_present(span, _DB_CONNECTION_STRING):
419+
connection_string: Optional[str] = span.attributes.get(_DB_CONNECTION_STRING)
420+
db_connection = _build_db_connection_string(connection_string)
421+
422+
if db_connection and db_name:
423+
db_connection = _escape_delimiters(db_name) + "|" + db_connection
424+
425+
return db_connection
426+
427+
428+
def _build_db_connection(address: str, port: int) -> Optional[str]:
429+
return _escape_delimiters(address) + ("|" + str(port) if port else "")
430+
431+
432+
def _build_db_connection_string(connection_string: str) -> Optional[str]:
433+
434+
uri = urlparse(connection_string)
435+
address = uri.hostname
436+
try:
437+
port = uri.port
438+
except ValueError:
439+
port = None
440+
441+
if address is None:
442+
return None
443+
444+
port_str = "|" + str(port) if port is not None and port != -1 else ""
445+
return _escape_delimiters(address) + port_str
446+
447+
448+
def _escape_delimiters(input_str: str) -> Optional[str]:
449+
if input_str is None:
450+
return None
451+
return input_str.replace("^", "^^").replace("|", "^|")
452+
453+
369454
def _set_span_kind_for_dependency(span: ReadableSpan, attributes: BoundedAttributes) -> None:
370455
span_kind: str = span.kind.name
371456
attributes[AWS_SPAN_KIND] = span_kind

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,15 @@ def is_aws_sdk_span(span: ReadableSpan) -> bool:
8585
return "aws-api" == span.attributes.get(SpanAttributes.RPC_SYSTEM)
8686

8787

88+
# Check if the current Span adheres to database semantic conventions
89+
def is_db_span(span: ReadableSpan) -> bool:
90+
return (
91+
is_key_present(span, SpanAttributes.DB_SYSTEM)
92+
or is_key_present(span, SpanAttributes.DB_OPERATION)
93+
or is_key_present(span, SpanAttributes.DB_STATEMENT)
94+
)
95+
96+
8897
def should_generate_service_metric_attributes(span: ReadableSpan) -> bool:
8998
return (is_local_root(span) and not _is_boto3sqs_span(span)) or SpanKind.SERVER == span.kind
9099

0 commit comments

Comments
 (0)