diff --git a/CHANGELOG.md b/CHANGELOG.md index d00a078cb8..e0bd639147 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Forward logs to Kafka directly instead of serialized as envelope. ([#4875](https://github.com/getsentry/relay/pull/4875)) - Add `gen_ai.response.tokens_per_second` span attribute on AI spans. ([#4883](https://github.com/getsentry/relay/pull/4883)) - Add support for playstation data requests. ([#4870](https://github.com/getsentry/relay/pull/4870)) +- Add snapshot testing for python integration tests. ([#4877](https://github.com/getsentry/relay/pull/4877)) ## 25.6.2 diff --git a/Makefile b/Makefile index 6309b052de..3c5fce8c0c 100644 --- a/Makefile +++ b/Makefile @@ -76,6 +76,24 @@ test-integration: build setup-venv ## run integration tests .venv/bin/pytest tests -n $(PYTEST_N) -v .PHONY: test-integration +snapshots-review: build setup-venv ## review all snapshots for files that have been modified + @changed_py=$$(git diff --name-only | grep '\.py$$'); \ + if [ -z "$$changed_py" ]; then \ + echo "No Python files changed — skipping insta review"; \ + else \ + .venv/bin/pytest -s --insta review $$changed_py; \ + fi +.PHONY: insta-review + +snapshots-accept: build setup-venv ## accept all new snapshots for files that have been modified + @changed_py=$$(git diff --name-only | grep '\.py$$'); \ + if [ -z "$$changed_py" ]; then \ + echo "No Python files changed — no snapshots updated"; \ + else \ + .venv/bin/pytest -s --insta update $$changed_py; \ + fi +.PHONY: insta-accept + # Documentation doc: doc-rust ## generate all API docs diff --git a/requirements-dev.txt b/requirements-dev.txt index 000eb07004..72f12f5861 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -13,6 +13,7 @@ flask==3.0.3 msgpack==1.1.0 opentelemetry-proto==1.32.1 pytest-localserver==0.8.1 +pytest-insta==0.3.0 pytest-sentry==0.3.0 pytest-xdist==3.5.0 pytest==7.4.3 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index cc147c942d..f45625fa3f 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -263,3 +263,109 @@ def redis_client(): @pytest.fixture def secondary_redis_client(): return redis.Redis(host="127.0.0.1", port=6380, db=0) + + +def redact_snapshot(data, exclude_keys=None, additional_keys=None): + """ + :param data: the data that will be redacted. + :param set exclude_keys: overwrites the default keys that will be redacted. + :param set additional_keys: keys in this set are added to the list of default keys that are redacted. + """ + if exclude_keys is None: + exclude_keys = {"timestamp", "received", "ingest_path", "event_id"} + else: + exclude_keys = set(exclude_keys) + + if additional_keys is not None: + exclude_keys = exclude_keys.union(additional_keys) + + def _redact_entire_subtree(obj, parent_key): + if isinstance(obj, dict): + return {k: _redact_entire_subtree(v, parent_key=k) for k, v in obj.items()} + elif isinstance(obj, list): + return [_redact_entire_subtree(item, parent_key=parent_key) for item in obj] + else: + return f"<{parent_key}>" + + def _redact(obj, path=None): + if isinstance(obj, dict): + redacted = {} + for k, v in obj.items(): + full_path = path + f".{k}" if path else k + if full_path in exclude_keys: + redacted[k] = _redact_entire_subtree(v, parent_key=k) + else: + redacted[k] = _redact(v, full_path) + return redacted + elif isinstance(obj, list): + return [_redact(item, path) for item in obj] + else: + return obj + + return _redact(data) + + +@pytest.fixture +def relay_snapshot(snapshot): + """ + Redacts certain fields from data to make them comparable with snapshots. + Fields that contain timestamps or UUIDs are problematic for snapshots because + they will be different each time. + It's possible to always set them to a fixed value before comparing to the snapshot, + but that is annoying and cumbersome. + This function makes sure that those fields remain comparable between runs by replacing + their content with the key name wrapped in `<` and `>`. + + To dynamically change the list of keys that will be redacted, call it like this: + `assert relay_snapshot("json", excluded_keys={"key1", "key2}) == data` + + To extend the default list with custom keys, call it like this: + `assert relay_snapshot("json", additional_keys={"bonusKey}) == data` + + Note that using both parameters is not advised as it will produce the same as just using the + `excluded_keys` parameter. + relay_snapshot("json", excluded_keys={"key1", "key2"}, additional_keys={"key3"}) is the same as + relay_snapshot("json", excluded_keys={"key1", "key2", "key3"}) + + Keys must match the full path to be redacted with every step being separated by a '.' (dot). + There is no special syntax for lists, instead it will apply to all elements in that list. + For example, "foo" will only match the top level item with the key "foo" while "bar.foo" will + only match the key "foo" that is a child of "bar". + + Specified keys do not have to be leaf nodes. If an intermediate key is specified, it will + redact the entire subtree and replace their values with the respective key name. + For example: + { + "name": "test", + "foo": { + "dynamicBar": "14412", + "dynamicBaz": "57712" + } + } + + will be redacted into + { + "name": "test", + "foo": { + "dynamicBar": "", + "dynamicBaz": "" + } + } + """ + + class SnapshotWrapper: + def __call__(self, name, exclude_keys=None, additional_keys=None): + self.name = name + self.exclude_keys = exclude_keys + self.additional_keys = additional_keys + return self + + def __eq__(self, other): + redacted = redact_snapshot( + other, + exclude_keys=self.exclude_keys, + additional_keys=self.additional_keys, + ) + return snapshot(self.name) == redacted + + return SnapshotWrapper() diff --git a/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json b/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json new file mode 100644 index 0000000000..c5df0f5498 --- /dev/null +++ b/tests/integration/snapshots/pii__snapshot_scrub_span_sentry_tags_advanced_rules__0.json @@ -0,0 +1,78 @@ +{ + "event_id": "", + "level": "error", + "version": "5", + "type": "default", + "logger": "", + "platform": "other", + "timestamp": "", + "received": "", + "user": { + "ip_address": "2.125.160.216", + "sentry_user": "ip:2.125.160.216", + "geo": { + "country_code": "GB", + "city": "Boxford", + "subdivision": "England", + "region": "United Kingdom" + } + }, + "sdk": { + "name": "raven-node", + "version": "2.6.3" + }, + "ingest_path": [ + { + "version": "", + "public_key": "" + } + ], + "key_id": "123", + "project": 42, + "spans": [ + { + "timestamp": 1746007551.0, + "start_timestamp": 1746007545.0, + "span_id": "aaaaaaaa00000000", + "trace_id": "aaaaaaaaaaaaaaaaaaaa000000000000", + "sentry_tags": { + "user.geo.country_code": "**", + "user.geo.subregion": "**" + } + } + ], + "_meta": { + "spans": { + "0": { + "sentry_tags": { + "user.geo.country_code": { + "": { + "rem": [ + [ + "@anything:mask", + "m", + 0, + 2 + ] + ], + "len": 2 + } + }, + "user.geo.subregion": { + "": { + "rem": [ + [ + "@anything:mask", + "m", + 0, + 2 + ] + ], + "len": 2 + } + } + } + } + } + } +} diff --git a/tests/integration/test_pii.py b/tests/integration/test_pii.py index 17c8965771..1c849a60ed 100644 --- a/tests/integration/test_pii.py +++ b/tests/integration/test_pii.py @@ -1,4 +1,6 @@ -def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay): +def test_snapshot_scrub_span_sentry_tags_advanced_rules( + mini_sentry, relay, relay_snapshot +): project_id = 42 relay = relay( mini_sentry, @@ -33,5 +35,4 @@ def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay): envelope = mini_sentry.captured_events.get(timeout=1) event = envelope.get_event() - assert event["spans"][0]["sentry_tags"]["user.geo.country_code"] == "**" - assert event["spans"][0]["sentry_tags"]["user.geo.subregion"] == "**" + assert relay_snapshot("json") == event