From 404fe09018b1a1447d7d1056343ab344f2f9fc06 Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Mon, 16 Jun 2025 09:50:00 +0200 Subject: [PATCH 1/7] feat(relay): introduce snapshot testing for integration tests --- requirements-dev.txt | 1 + tests/integration/conftest.py | 49 +++++++++++++++++++++++++++++++++++ tests/integration/test_pii.py | 38 +++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index 3e57dc19220..6ba3a26d7d4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -24,6 +24,7 @@ setuptools==70.0.0 werkzeug==3.0.6 zstandard==0.18.0 sentry_protos==0.2.0 +pytest-insta==0.3.0 # typing things mypy==1.10.0 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 17f68e36724..68a1d3d1ed6 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -263,3 +263,52 @@ def redis_client(): @pytest.fixture def secondary_redis_client(): return redis.Redis(host="127.0.0.1", port=6380, db=0) + + +import pytest + + +def redact_snapshot(data, exclude_keys=None): + if exclude_keys is None: + exclude_keys = {"timestamp", "received", "ingest_path", "event_id"} + else: + exclude_keys = set(exclude_keys) + + def _redact_entire_subtree(obj): + if isinstance(obj, dict): + return {k: f"<{k}>" for k in obj} + elif isinstance(obj, list): + return [_redact_entire_subtree(item) for item in obj] + else: + return "" + + def _redact(obj): + if isinstance(obj, dict): + redacted = {} + for k, v in obj.items(): + if k in exclude_keys: + redacted[k] = _redact_entire_subtree(v) + else: + redacted[k] = _redact(v) + return redacted + elif isinstance(obj, list): + return [_redact(item) for item in obj] + else: + return obj + + return _redact(data) + + +@pytest.fixture +def my_snapshot(snapshot): + class SnapshotWrapper: + def __call__(self, name, exclude_keys=None): + self.name = name + self.exclude_keys = exclude_keys + return self + + def __eq__(self, other): + redacted = redact_snapshot(other, exclude_keys=self.exclude_keys) + return snapshot(self.name) == redacted + + return SnapshotWrapper() diff --git a/tests/integration/test_pii.py b/tests/integration/test_pii.py index 17c89657710..9215c6d0f77 100644 --- a/tests/integration/test_pii.py +++ b/tests/integration/test_pii.py @@ -35,3 +35,41 @@ def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay): event = envelope.get_event() assert event["spans"][0]["sentry_tags"]["user.geo.country_code"] == "**" assert event["spans"][0]["sentry_tags"]["user.geo.subregion"] == "**" + + +def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay, my_snapshot): + project_id = 42 + relay = relay( + mini_sentry, + options={"geoip": {"path": "tests/fixtures/GeoIP2-Enterprise-Test.mmdb"}}, + ) + config = mini_sentry.add_basic_project_config(project_id) + config["config"]["piiConfig"]["applications"][ + "$span.sentry_tags.'user.geo.country_code'" + ] = ["@anything:mask"] + config["config"]["piiConfig"]["applications"][ + "$span.sentry_tags.'user.geo.subregion'" + ] = ["@anything:mask"] + + relay.send_event( + project_id, + { + "user": {"ip_address": "2.125.160.216"}, + "spans": [ + { + "timestamp": 1746007551, + "start_timestamp": 1746007545, + "span_id": "aaaaaaaa00000000", + "trace_id": "aaaaaaaaaaaaaaaaaaaa000000000000", + "sentry_tags": { + "user.geo.country_code": "AT", + "user.geo.subregion": "12", + }, + } + ], + }, + ) + + envelope = mini_sentry.captured_events.get(timeout=1) + event = envelope.get_event() + assert my_snapshot("json") == event From 185374f8b49715f490fd62c3a040f65d75c1e1a6 Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Fri, 27 Jun 2025 15:37:58 +0200 Subject: [PATCH 2/7] feat(relay): add snapshot testing for integration tests --- Makefile | 18 ++++++++++++++ requirements-dev.txt | 2 +- tests/integration/conftest.py | 24 +++++++++++------- tests/integration/test_pii.py | 47 ++++------------------------------- 4 files changed, 39 insertions(+), 52 deletions(-) diff --git a/Makefile b/Makefile index 6309b052de8..62ba100d157 100644 --- a/Makefile +++ b/Makefile @@ -76,6 +76,24 @@ test-integration: build setup-venv ## run integration tests .venv/bin/pytest tests -n $(PYTEST_N) -v .PHONY: test-integration +insta-review: build setup-venv ## review all snapshots for files that have been modified + @changed_py=$$(git diff --name-only | grep '\.py$$'); \ + if [ -z "$$changed_py" ]; then \ + echo "No Python files changed — skipping insta review"; \ + else \ + .venv/bin/pytest -s --insta review $$changed_py; \ + fi +.PHONY: insta-review + +insta-accept: build setup-venv ## accept all new snapshots for files that have been modified + @changed_py=$$(git diff --name-only | grep '\.py$$'); \ + if [ -z "$$changed_py" ]; then \ + echo "No Python files changed — no snapshots updated"; \ + else \ + .venv/bin/pytest -s --insta update $$changed_py; \ + fi +.PHONY: insta-accept + # Documentation doc: doc-rust ## generate all API docs diff --git a/requirements-dev.txt b/requirements-dev.txt index 818f1401fd2..72f12f5861e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -13,6 +13,7 @@ flask==3.0.3 msgpack==1.1.0 opentelemetry-proto==1.32.1 pytest-localserver==0.8.1 +pytest-insta==0.3.0 pytest-sentry==0.3.0 pytest-xdist==3.5.0 pytest==7.4.3 @@ -24,7 +25,6 @@ setuptools==70.0.0 werkzeug==3.0.6 zstandard==0.18.0 sentry_protos==0.2.0 -pytest-insta==0.3.0 # typing things mypy==1.10.0 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 9b1c6dd9a9b..aeb47be9354 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -265,29 +265,35 @@ def secondary_redis_client(): return redis.Redis(host="127.0.0.1", port=6380, db=0) -import pytest - - def redact_snapshot(data, exclude_keys=None): + """ + Redacts certain fields from data to make them comparable with snapshots. + Fields that contain timestamps or UUIDs are problematic for snapshots because + they will be different each time. + It's possible to always set them to a fixed value before comparing to the snapshot, + but that is annoying and cumbersome. + This function makes sure that those fields remain comparable between runs by replacing + their content with the key name wrapped in `<` and `>`. + """ if exclude_keys is None: exclude_keys = {"timestamp", "received", "ingest_path", "event_id"} else: exclude_keys = set(exclude_keys) - def _redact_entire_subtree(obj): + def _redact_entire_subtree(obj, parent_key=None): if isinstance(obj, dict): - return {k: f"<{k}>" for k in obj} + return {k: _redact_entire_subtree(v, parent_key=k) for k, v in obj.items()} elif isinstance(obj, list): - return [_redact_entire_subtree(item) for item in obj] + return [_redact_entire_subtree(item, parent_key=parent_key) for item in obj] else: - return "" + return f"<{parent_key}>" def _redact(obj): if isinstance(obj, dict): redacted = {} for k, v in obj.items(): if k in exclude_keys: - redacted[k] = _redact_entire_subtree(v) + redacted[k] = _redact_entire_subtree(v, parent_key=k) else: redacted[k] = _redact(v) return redacted @@ -300,7 +306,7 @@ def _redact(obj): @pytest.fixture -def my_snapshot(snapshot): +def relay_snapshot(snapshot): class SnapshotWrapper: def __call__(self, name, exclude_keys=None): self.name = name diff --git a/tests/integration/test_pii.py b/tests/integration/test_pii.py index 9215c6d0f77..4b2a7de7f17 100644 --- a/tests/integration/test_pii.py +++ b/tests/integration/test_pii.py @@ -1,4 +1,6 @@ -def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay): +def test_snapshot_scrub_span_sentry_tags_advanced_rules( + mini_sentry, relay, relay_snapshot +): project_id = 42 relay = relay( mini_sentry, @@ -23,7 +25,7 @@ def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay): "span_id": "aaaaaaaa00000000", "trace_id": "aaaaaaaaaaaaaaaaaaaa000000000000", "sentry_tags": { - "user.geo.country_code": "AT", + "user.geo.country_code": "ATdasdsa", "user.geo.subregion": "12", }, } @@ -33,43 +35,4 @@ def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay): envelope = mini_sentry.captured_events.get(timeout=1) event = envelope.get_event() - assert event["spans"][0]["sentry_tags"]["user.geo.country_code"] == "**" - assert event["spans"][0]["sentry_tags"]["user.geo.subregion"] == "**" - - -def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay, my_snapshot): - project_id = 42 - relay = relay( - mini_sentry, - options={"geoip": {"path": "tests/fixtures/GeoIP2-Enterprise-Test.mmdb"}}, - ) - config = mini_sentry.add_basic_project_config(project_id) - config["config"]["piiConfig"]["applications"][ - "$span.sentry_tags.'user.geo.country_code'" - ] = ["@anything:mask"] - config["config"]["piiConfig"]["applications"][ - "$span.sentry_tags.'user.geo.subregion'" - ] = ["@anything:mask"] - - relay.send_event( - project_id, - { - "user": {"ip_address": "2.125.160.216"}, - "spans": [ - { - "timestamp": 1746007551, - "start_timestamp": 1746007545, - "span_id": "aaaaaaaa00000000", - "trace_id": "aaaaaaaaaaaaaaaaaaaa000000000000", - "sentry_tags": { - "user.geo.country_code": "AT", - "user.geo.subregion": "12", - }, - } - ], - }, - ) - - envelope = mini_sentry.captured_events.get(timeout=1) - event = envelope.get_event() - assert my_snapshot("json") == event + assert relay_snapshot("json") == event From 9d5583ebdb3b267d7a0317222c4f4ae6c325a66d Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Mon, 30 Jun 2025 14:58:30 +0200 Subject: [PATCH 3/7] fix --- tests/integration/test_pii.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_pii.py b/tests/integration/test_pii.py index 4b2a7de7f17..1c849a60edd 100644 --- a/tests/integration/test_pii.py +++ b/tests/integration/test_pii.py @@ -25,7 +25,7 @@ def test_snapshot_scrub_span_sentry_tags_advanced_rules( "span_id": "aaaaaaaa00000000", "trace_id": "aaaaaaaaaaaaaaaaaaaa000000000000", "sentry_tags": { - "user.geo.country_code": "ATdasdsa", + "user.geo.country_code": "AT", "user.geo.subregion": "12", }, } From 49a229c4a2360aa30111a01247c5b2fb9a303e3b Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Mon, 30 Jun 2025 17:32:22 +0200 Subject: [PATCH 4/7] add snapshot --- ...ub_span_sentry_tags_advanced_rules__0.json | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json diff --git a/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json b/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json new file mode 100644 index 00000000000..c8482aa9e71 --- /dev/null +++ b/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json @@ -0,0 +1,78 @@ +{ + "event_id": "", + "level": "error", + "version": "5", + "type": "default", + "logger": "", + "platform": "other", + "timestamp": "", + "received": "", + "user": { + "ip_address": "2.125.160.216", + "sentry_user": "ip:2.125.160.216", + "geo": { + "country_code": "GB", + "city": "Boxford", + "subdivision": "England", + "region": "United Kingdom" + } + }, + "sdk": { + "name": "raven-node", + "version": "2.6.3" + }, + "ingest_path": [ + { + "version": "", + "public_key": "" + } + ], + "key_id": "123", + "project": 42, + "spans": [ + { + "timestamp": "", + "start_timestamp": 1746007545.0, + "span_id": "aaaaaaaa00000000", + "trace_id": "aaaaaaaaaaaaaaaaaaaa000000000000", + "sentry_tags": { + "user.geo.country_code": "**", + "user.geo.subregion": "**" + } + } + ], + "_meta": { + "spans": { + "0": { + "sentry_tags": { + "user.geo.country_code": { + "": { + "rem": [ + [ + "@anything:mask", + "m", + 0, + 2 + ] + ], + "len": 2 + } + }, + "user.geo.subregion": { + "": { + "rem": [ + [ + "@anything:mask", + "m", + 0, + 2 + ] + ], + "len": 2 + } + } + } + } + } + } +} From 8af310fd94ba07206b0f6e20ce88b03e20ba409c Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Tue, 1 Jul 2025 15:45:15 +0200 Subject: [PATCH 5/7] add path support and docs --- Makefile | 4 +- tests/integration/conftest.py | 66 ++++++++++++++++--- ...ub_span_sentry_tags_advanced_rules__0.json | 2 +- 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 62ba100d157..3c5fce8c0c9 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ test-integration: build setup-venv ## run integration tests .venv/bin/pytest tests -n $(PYTEST_N) -v .PHONY: test-integration -insta-review: build setup-venv ## review all snapshots for files that have been modified +snapshots-review: build setup-venv ## review all snapshots for files that have been modified @changed_py=$$(git diff --name-only | grep '\.py$$'); \ if [ -z "$$changed_py" ]; then \ echo "No Python files changed — skipping insta review"; \ @@ -85,7 +85,7 @@ insta-review: build setup-venv ## review all snapshots for files that have been fi .PHONY: insta-review -insta-accept: build setup-venv ## accept all new snapshots for files that have been modified +snapshots-accept: build setup-venv ## accept all new snapshots for files that have been modified @changed_py=$$(git diff --name-only | grep '\.py$$'); \ if [ -z "$$changed_py" ]; then \ echo "No Python files changed — no snapshots updated"; \ diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index aeb47be9354..f5a69f9910c 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -265,7 +265,7 @@ def secondary_redis_client(): return redis.Redis(host="127.0.0.1", port=6380, db=0) -def redact_snapshot(data, exclude_keys=None): +def redact_snapshot(data, exclude_keys=None, additional_keys=None): """ Redacts certain fields from data to make them comparable with snapshots. Fields that contain timestamps or UUIDs are problematic for snapshots because @@ -274,13 +274,57 @@ def redact_snapshot(data, exclude_keys=None): but that is annoying and cumbersome. This function makes sure that those fields remain comparable between runs by replacing their content with the key name wrapped in `<` and `>`. + + To dynamically change the list of keys that will be redacted, call it like this: + `assert relay_snapshot("json", excluded_keys={"key1", "key2}) == data` + + To extend the default list with custom keys, call it like this: + `assert relay_snapshot("json", additional_keys={"bonusKey}) == data` + + Note that using both parameters is not advised as it will produce the same as just using the + `excluded_keys` parameter. + relay_snapshot("json", excluded_keys={"key1", "key2"}, additional_keys={"key3"}) is the same as + relay_snapshot("json", excluded_keys={"key1", "key2", "key3"}) + + Keys must match the full path to be redacted with every step being separated by a '.' (dot). + There is no special syntax for lists, instead it will apply to all elements in that list. + For example, "foo" will only match the top level item with the key "foo" while "bar.foo" will + only match the key "foo" that is a child of "bar". + + Specified keys do not have to be leaf nodes. If an intermediate key is specified, it will + redact the entire subtree and replace their values with the respective key name. + For example: + { + "name": "test", + "foo": { + "dynamicBar": "14412", + "dynamicBaz": "57712" + } + } + + will be redacted into + { + "name": "test", + "foo": { + "dynamicBar": "", + "dynamicBaz": "" + } + } + + :param data: the data that will be redacted. + :param set exclude_keys: overwrites the keys that will be redacted. In other words, the keys in the list here + will be the only keys that will be redacted. + :param set additional_keys: keys in this set are added to the list of default keys that are redacted. """ if exclude_keys is None: exclude_keys = {"timestamp", "received", "ingest_path", "event_id"} else: exclude_keys = set(exclude_keys) - def _redact_entire_subtree(obj, parent_key=None): + if additional_keys is not None: + exclude_keys = exclude_keys.union(additional_keys) + + def _redact_entire_subtree(obj, parent_key): if isinstance(obj, dict): return {k: _redact_entire_subtree(v, parent_key=k) for k, v in obj.items()} elif isinstance(obj, list): @@ -288,17 +332,18 @@ def _redact_entire_subtree(obj, parent_key=None): else: return f"<{parent_key}>" - def _redact(obj): + def _redact(obj, path=None): if isinstance(obj, dict): redacted = {} for k, v in obj.items(): - if k in exclude_keys: + full_path = path + f".{k}" if path else k + if full_path in exclude_keys: redacted[k] = _redact_entire_subtree(v, parent_key=k) else: - redacted[k] = _redact(v) + redacted[k] = _redact(v, full_path) return redacted elif isinstance(obj, list): - return [_redact(item) for item in obj] + return [_redact(item, path) for item in obj] else: return obj @@ -308,13 +353,18 @@ def _redact(obj): @pytest.fixture def relay_snapshot(snapshot): class SnapshotWrapper: - def __call__(self, name, exclude_keys=None): + def __call__(self, name, exclude_keys=None, additional_keys=None): self.name = name self.exclude_keys = exclude_keys + self.additional_keys = additional_keys return self def __eq__(self, other): - redacted = redact_snapshot(other, exclude_keys=self.exclude_keys) + redacted = redact_snapshot( + other, + exclude_keys=self.exclude_keys, + additional_keys=self.additional_keys, + ) return snapshot(self.name) == redacted return SnapshotWrapper() diff --git a/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json b/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json index c8482aa9e71..c5df0f54988 100644 --- a/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json +++ b/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json @@ -31,7 +31,7 @@ "project": 42, "spans": [ { - "timestamp": "", + "timestamp": 1746007551.0, "start_timestamp": 1746007545.0, "span_id": "aaaaaaaa00000000", "trace_id": "aaaaaaaaaaaaaaaaaaaa000000000000", From b243cc3a95e2ff3581b33a1cbf44f048a9e418b0 Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Wed, 2 Jul 2025 16:48:22 +0200 Subject: [PATCH 6/7] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d00a078cb80..e0bd639147a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Forward logs to Kafka directly instead of serialized as envelope. ([#4875](https://github.com/getsentry/relay/pull/4875)) - Add `gen_ai.response.tokens_per_second` span attribute on AI spans. ([#4883](https://github.com/getsentry/relay/pull/4883)) - Add support for playstation data requests. ([#4870](https://github.com/getsentry/relay/pull/4870)) +- Add snapshot testing for python integration tests. ([#4877](https://github.com/getsentry/relay/pull/4877)) ## 25.6.2 From 955d01f426d554669ed09da907dd25736c3f99d9 Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Wed, 2 Jul 2025 17:04:23 +0200 Subject: [PATCH 7/7] move doc --- tests/integration/conftest.py | 81 ++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index f5a69f9910c..f45625fa3f1 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -266,6 +266,47 @@ def secondary_redis_client(): def redact_snapshot(data, exclude_keys=None, additional_keys=None): + """ + :param data: the data that will be redacted. + :param set exclude_keys: overwrites the default keys that will be redacted. + :param set additional_keys: keys in this set are added to the list of default keys that are redacted. + """ + if exclude_keys is None: + exclude_keys = {"timestamp", "received", "ingest_path", "event_id"} + else: + exclude_keys = set(exclude_keys) + + if additional_keys is not None: + exclude_keys = exclude_keys.union(additional_keys) + + def _redact_entire_subtree(obj, parent_key): + if isinstance(obj, dict): + return {k: _redact_entire_subtree(v, parent_key=k) for k, v in obj.items()} + elif isinstance(obj, list): + return [_redact_entire_subtree(item, parent_key=parent_key) for item in obj] + else: + return f"<{parent_key}>" + + def _redact(obj, path=None): + if isinstance(obj, dict): + redacted = {} + for k, v in obj.items(): + full_path = path + f".{k}" if path else k + if full_path in exclude_keys: + redacted[k] = _redact_entire_subtree(v, parent_key=k) + else: + redacted[k] = _redact(v, full_path) + return redacted + elif isinstance(obj, list): + return [_redact(item, path) for item in obj] + else: + return obj + + return _redact(data) + + +@pytest.fixture +def relay_snapshot(snapshot): """ Redacts certain fields from data to make them comparable with snapshots. Fields that contain timestamps or UUIDs are problematic for snapshots because @@ -310,48 +351,8 @@ def redact_snapshot(data, exclude_keys=None, additional_keys=None): "dynamicBaz": "" } } - - :param data: the data that will be redacted. - :param set exclude_keys: overwrites the keys that will be redacted. In other words, the keys in the list here - will be the only keys that will be redacted. - :param set additional_keys: keys in this set are added to the list of default keys that are redacted. """ - if exclude_keys is None: - exclude_keys = {"timestamp", "received", "ingest_path", "event_id"} - else: - exclude_keys = set(exclude_keys) - - if additional_keys is not None: - exclude_keys = exclude_keys.union(additional_keys) - - def _redact_entire_subtree(obj, parent_key): - if isinstance(obj, dict): - return {k: _redact_entire_subtree(v, parent_key=k) for k, v in obj.items()} - elif isinstance(obj, list): - return [_redact_entire_subtree(item, parent_key=parent_key) for item in obj] - else: - return f"<{parent_key}>" - def _redact(obj, path=None): - if isinstance(obj, dict): - redacted = {} - for k, v in obj.items(): - full_path = path + f".{k}" if path else k - if full_path in exclude_keys: - redacted[k] = _redact_entire_subtree(v, parent_key=k) - else: - redacted[k] = _redact(v, full_path) - return redacted - elif isinstance(obj, list): - return [_redact(item, path) for item in obj] - else: - return obj - - return _redact(data) - - -@pytest.fixture -def relay_snapshot(snapshot): class SnapshotWrapper: def __call__(self, name, exclude_keys=None, additional_keys=None): self.name = name