Skip to content

fix(tracer): error when encoding bytes and adding a string (#13419) [backport 2.21] #13819

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: 2.21
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ddtrace/_trace/_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@

MAX_SPAN_META_KEY_LEN = 200
MAX_SPAN_META_VALUE_LEN = 25000
TRUNCATED_SPAN_ATTRIBUTE_LEN = 2500
21 changes: 20 additions & 1 deletion ddtrace/internal/_encoding.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from cpython cimport *
from cpython.bytearray cimport PyByteArray_CheckExact
from .._trace._limits import MAX_SPAN_META_VALUE_LEN
from ddtrace._trace._limits import TRUNCATED_SPAN_ATTRIBUTE_LEN
from libc cimport stdint
from libc.string cimport strlen

Expand Down Expand Up @@ -93,6 +95,15 @@ cdef inline int array_prefix_size(stdint.uint32_t l):
return MSGPACK_ARRAY_LENGTH_PREFIX_SIZE


cdef inline object truncate_string(object string):
if string and len(string) > MAX_SPAN_META_VALUE_LEN:
if PyBytesLike_Check(string):
return string[:TRUNCATED_SPAN_ATTRIBUTE_LEN - 14] + b"<truncated>..."
elif PyUnicode_Check(string):
return string[:TRUNCATED_SPAN_ATTRIBUTE_LEN - 14] + "<truncated>..."
return string


cdef inline int pack_bytes(msgpack_packer *pk, char *bs, Py_ssize_t l):
cdef int ret

Expand Down Expand Up @@ -129,14 +140,18 @@ cdef inline int pack_text(msgpack_packer *pk, object text) except? -1:

if PyBytesLike_Check(text):
L = len(text)
if L > ITEM_LIMIT:
if L > MAX_SPAN_META_VALUE_LEN:
PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(text).tp_name)
text = truncate_string(text)
L = len(text)
ret = msgpack_pack_raw(pk, L)
if ret == 0:
ret = msgpack_pack_raw_body(pk, <char *> text, L)
return ret

if PyUnicode_Check(text):
if len(text) > MAX_SPAN_META_VALUE_LEN:
text = truncate_string(text)
IF PY_MAJOR_VERSION >= 3:
ret = msgpack_pack_unicode(pk, text, ITEM_LIMIT)
if ret == -2:
Expand Down Expand Up @@ -248,6 +263,9 @@ cdef class MsgpackStringTable(StringTable):
cdef insert(self, object string):
cdef int ret

# Before inserting, truncate the string if it is greater than MAX_SPAN_META_VALUE_LEN
string = truncate_string(string)

if len(string) > self._max_string_length:
string = "<dropped string of length %d because it's too long (max allowed length %d)>" % (
len(string), self._max_string_length
Expand Down Expand Up @@ -846,6 +864,7 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):
raise

cdef inline int _pack_string(self, object string) except? -1:
string = truncate_string(string)
return msgpack_pack_uint32(&self.pk, self._st._index(string))

cdef void * get_dd_origin_ref(self, str dd_origin):
Expand Down
4 changes: 4 additions & 0 deletions releasenotes/notes/encode-bytes-974d93cec3725455.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
tracing: This resolves a ``TypeError`` in encoding when truncating a large bytes object.
58 changes: 46 additions & 12 deletions tests/integration/test_integration_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from ddtrace.trace import Tracer
from ddtrace.trace import tracer
from tests.integration.utils import AGENT_VERSION
from tests.integration.utils import mark_snapshot
from tests.integration.utils import parametrize_with_all_encodings
from tests.utils import override_global_config
from tests.utils import snapshot

Expand Down Expand Up @@ -275,14 +273,50 @@ def test_snapshot_skip():
pass


@parametrize_with_all_encodings
@mark_snapshot
def test_setting_span_tags_and_metrics_generates_no_error_logs():
import ddtrace
@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
@pytest.mark.snapshot()
def test_setting_span_tags_and_metrics_generates_no_error_logs(encoding):
from ddtrace import tracer

s = ddtrace.tracer.trace("operation", service="my-svc")
s.set_tag("env", "my-env")
s.set_metric("number1", 123)
s.set_metric("number2", 12.0)
s.set_metric("number3", "1")
s.finish()
with override_global_config(dict(_trace_api=encoding)):
s = tracer.trace("operation", service="my-svc")
s.set_tag("env", "my-env")
s.set_metric("number1", 123)
s.set_metric("number2", 12.0)
s.set_metric("number3", "1")
s.finish()


@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
@pytest.mark.snapshot()
def test_encode_span_with_large_string_attributes(encoding):
from ddtrace import tracer

with override_global_config(dict(_trace_api=encoding)):
with tracer.trace(name="a" * 25000, resource="b" * 25001) as span:
span.set_tag(key="c" * 25001, value="d" * 2000)


@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
@pytest.mark.snapshot()
def test_encode_span_with_large_bytes_attributes(encoding):
from ddtrace import tracer

with override_global_config(dict(_trace_api=encoding)):
name = b"a" * 25000
resource = b"b" * 25001
key = b"c" * 25001
value = b"d" * 2000

with tracer.trace(name=name, resource=resource) as span:
span.set_tag(key=key, value=value)


@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
@pytest.mark.snapshot()
def test_encode_span_with_large_unicode_string_attributes(encoding):
from ddtrace import tracer

with override_global_config(dict(_trace_api=encoding)):
with tracer.trace(name="á" * 25000, resource="â" * 25001) as span:
span.set_tag(key="å" * 25001, value="ä" * 2000)

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading