From c54990c861b6d39a539e40fa676e27375910ec35 Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Tue, 2 Jun 2026 18:44:29 -0700 Subject: [PATCH 1/6] fix(plugin-chart-ag-grid-table): validate filter values/operators in state converter (#40623) Co-authored-by: Claude Code --- .../src/stateConversion.ts | 30 ++++++-- .../test/stateConversion.test.ts | 73 +++++++++++++++++++ 2 files changed, 95 insertions(+), 8 deletions(-) create mode 100644 superset-frontend/plugins/plugin-chart-ag-grid-table/test/stateConversion.test.ts diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/stateConversion.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/stateConversion.ts index bc04be642215..0bbbd26b8aa0 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/stateConversion.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/stateConversion.ts @@ -216,7 +216,13 @@ function convertFilterToSQL( if (conditions.length === 0) return null; if (conditions.length === 1) return conditions[0]; - return `(${conditions.join(` ${filter.operator} `)})`; + // The join operator is interpolated into raw SQL, so only allow the two + // boolean connectors AG Grid produces. + const joinOperator = String(filter.operator).toUpperCase(); + if (joinOperator !== 'AND' && joinOperator !== 'OR') { + return null; + } + return `(${conditions.join(` ${joinOperator} `)})`; } // Handle blank/notBlank operators for all filter types @@ -263,20 +269,26 @@ function convertFilterToSQL( filter.filter !== undefined && filter.type ) { + // Number values are interpolated into the clause without quoting, so they + // must be finite numbers. Coerce and skip the filter if it is not numeric. + const numericValue = Number(filter.filter); + if (!Number.isFinite(numericValue)) { + return null; + } // Map number filter types to SQL operators switch (filter.type) { case FILTER_OPERATORS.EQUALS: - return `${colId} ${SQL_OPERATORS.EQUALS} ${filter.filter}`; + return `${colId} ${SQL_OPERATORS.EQUALS} ${numericValue}`; case FILTER_OPERATORS.NOT_EQUAL: - return `${colId} ${SQL_OPERATORS.NOT_EQUALS} ${filter.filter}`; + return `${colId} ${SQL_OPERATORS.NOT_EQUALS} ${numericValue}`; case FILTER_OPERATORS.LESS_THAN: - return `${colId} ${SQL_OPERATORS.LESS_THAN} ${filter.filter}`; + return `${colId} ${SQL_OPERATORS.LESS_THAN} ${numericValue}`; case FILTER_OPERATORS.LESS_THAN_OR_EQUAL: - return `${colId} ${SQL_OPERATORS.LESS_THAN_OR_EQUAL} ${filter.filter}`; + return `${colId} ${SQL_OPERATORS.LESS_THAN_OR_EQUAL} ${numericValue}`; case FILTER_OPERATORS.GREATER_THAN: - return `${colId} ${SQL_OPERATORS.GREATER_THAN} ${filter.filter}`; + return `${colId} ${SQL_OPERATORS.GREATER_THAN} ${numericValue}`; case FILTER_OPERATORS.GREATER_THAN_OR_EQUAL: - return `${colId} ${SQL_OPERATORS.GREATER_THAN_OR_EQUAL} ${filter.filter}`; + return `${colId} ${SQL_OPERATORS.GREATER_THAN_OR_EQUAL} ${numericValue}`; default: return null; } @@ -314,7 +326,9 @@ export function convertFilterModel( return undefined; } - const sqlClauses: Record = {}; + // Null-prototype object: column ids come from the (user-influenced) filter + // model and are used as keys here, so avoid prototype-chain keys. + const sqlClauses: Record = Object.create(null); Object.entries(filterModel).forEach(([colId, filter]) => { const sqlClause = convertFilterToSQL(colId, filter); diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/test/stateConversion.test.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/test/stateConversion.test.ts new file mode 100644 index 000000000000..d01c6118c9af --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/test/stateConversion.test.ts @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { convertFilterModel } from '../src/stateConversion'; + +describe('convertFilterModel', () => { + test('emits a clause for a valid numeric comparison filter', () => { + const result = convertFilterModel({ + revenue: { filterType: 'number', type: 'greaterThan', filter: 100 }, + } as any); + + expect(result?.sqlClauses?.revenue).toBe('revenue > 100'); + }); + + test('drops a number filter whose value is not numeric', () => { + const result = convertFilterModel({ + revenue: { filterType: 'number', type: 'equals', filter: '1 OR 1=1' }, + } as any); + + expect(result).toBeUndefined(); + }); + + test('joins conditions with a valid boolean operator', () => { + const result = convertFilterModel({ + revenue: { + filterType: 'number', + operator: 'AND', + condition1: { filterType: 'number', type: 'greaterThan', filter: 1 }, + condition2: { filterType: 'number', type: 'lessThan', filter: 9 }, + }, + } as any); + + expect(result?.sqlClauses?.revenue).toBe('(revenue > 1 AND revenue < 9)'); + }); + + test('drops a compound filter whose join operator is not AND/OR', () => { + const result = convertFilterModel({ + revenue: { + filterType: 'number', + operator: 'AND 1=1) OR (1=1', + condition1: { filterType: 'number', type: 'greaterThan', filter: 1 }, + condition2: { filterType: 'number', type: 'lessThan', filter: 9 }, + }, + } as any); + + expect(result).toBeUndefined(); + }); + + test('stores clauses on a null-prototype map (prototype-safe column ids)', () => { + const result = convertFilterModel({ + constructor: { filterType: 'number', type: 'equals', filter: 5 }, + } as any); + + // The map has no prototype, so a column id like "constructor" is just data. + expect(Object.getPrototypeOf(result?.sqlClauses)).toBeNull(); + expect(result?.sqlClauses?.constructor).toBe('constructor = 5'); + }); +}); From ac522ded1c46a3c27441b992bad7f411bbc97c35 Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Tue, 2 Jun 2026 21:19:24 -0700 Subject: [PATCH 2/6] fix(ssh-tunnel): validate server_address format (SSRF defense-in-depth) (#40665) Co-authored-by: Claude Code --- superset/databases/schemas.py | 18 ++++++++++++++++-- tests/unit_tests/databases/schema_tests.py | 15 +++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/superset/databases/schemas.py b/superset/databases/schemas.py index b399a25ad7cb..540371a9fd17 100644 --- a/superset/databases/schemas.py +++ b/superset/databases/schemas.py @@ -34,7 +34,7 @@ validates, validates_schema, ) -from marshmallow.validate import Length, OneOf, Range, ValidationError +from marshmallow.validate import Length, OneOf, Range, Regexp, ValidationError from sqlalchemy import MetaData from werkzeug.datastructures import FileStorage @@ -449,7 +449,21 @@ class DatabaseSSHTunnel(Schema): id = fields.Integer( allow_none=True, metadata={"description": "SSH Tunnel ID (for updates)"} ) - server_address = fields.String() + # Restrict the SSH tunnel host to a plausible hostname / IP literal. This + # rejects values carrying URL structure, whitespace, or path separators — + # defense in depth against using the tunnel host as an SSRF vector. + server_address = fields.String( + validate=[ + Length(min=1, max=256), + Regexp( + r"^[A-Za-z0-9._:\-\[\]]+$", + error=( + "server_address must be a valid hostname or IP address " + "(letters, digits, and '.', '_', '-', ':', '[', ']' only)" + ), + ), + ] + ) server_port = fields.Integer() username = fields.String() diff --git a/tests/unit_tests/databases/schema_tests.py b/tests/unit_tests/databases/schema_tests.py index 1fd14343a8aa..e808d19cc4fb 100644 --- a/tests/unit_tests/databases/schema_tests.py +++ b/tests/unit_tests/databases/schema_tests.py @@ -378,3 +378,18 @@ def test_import_schema_allows_masked_fields_for_existing_db( } # Should not raise - masked values are allowed for existing DBs schema.load(config) + + +def test_ssh_tunnel_server_address_rejects_non_hostnames() -> None: + """server_address must look like a hostname/IP, not a URL or arbitrary text.""" + from superset.databases.schemas import DatabaseSSHTunnel + + schema = DatabaseSSHTunnel() + base = {"server_port": 22, "username": "u", "private_key": "k"} + + for address in ("10.0.0.5", "ssh.internal.example.com", "[::1]", "bastion_host"): + schema.load({**base, "server_address": address}) + + for bad in ("http://evil/", "1.2.3.4/../x", "a b", "file:///etc/passwd"): + with pytest.raises(ValidationError): + schema.load({**base, "server_address": bad}) From 0b9764aed56256aa25ec0725ec7103146cae225c Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Tue, 2 Jun 2026 21:20:11 -0700 Subject: [PATCH 3/6] fix(mcp): honor AUTH_ROLE_ADMIN and warn on permission-less protected tools (#40659) Co-authored-by: Claude Code --- superset/mcp_service/auth.py | 17 ++++++++- .../mcp_service/utils/permissions_utils.py | 19 +++++++--- .../utils/test_permissions_utils.py | 37 ++++++++++++++++++- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/superset/mcp_service/auth.py b/superset/mcp_service/auth.py index 1b851e46da16..e55e065aeae9 100644 --- a/superset/mcp_service/auth.py +++ b/superset/mcp_service/auth.py @@ -65,6 +65,10 @@ CLASS_PERMISSION_ATTR = "_class_permission_name" METHOD_PERMISSION_ATTR = "_method_permission_name" +# Tools already warned about for declaring no class_permission_name, so the +# warning surfaces once per tool instead of on every protected-tool call. +_warned_permissionless_tools: set[str] = set() + class MCPNoAuthSourceError(ValueError): """Raised when no authentication source is available for a request. @@ -138,7 +142,18 @@ def check_tool_permission(func: Callable[..., Any], *, log_denial: bool = True) class_permission_name = getattr(func, CLASS_PERMISSION_ATTR, None) if not class_permission_name: - # No RBAC configured for this tool; allow by default. + # No RBAC configured for this tool; allow by default. This is a + # supported configuration (a protected tool may intentionally + # declare no permission class), but surface it ONCE per tool so an + # accidental omission on a sensitive tool doesn't silently fail open + # — without emitting a WARNING on every protected-tool call. + if func.__name__ not in _warned_permissionless_tools: + _warned_permissionless_tools.add(func.__name__) + logger.warning( + "Tool %s is permission-protected but declares no " + "class_permission_name; allowing access without an RBAC check", + func.__name__, + ) return True method_permission_name = getattr(func, METHOD_PERMISSION_ATTR, "read") diff --git a/superset/mcp_service/utils/permissions_utils.py b/superset/mcp_service/utils/permissions_utils.py index c4b24afc50d0..36977e8866e5 100644 --- a/superset/mcp_service/utils/permissions_utils.py +++ b/superset/mcp_service/utils/permissions_utils.py @@ -89,11 +89,20 @@ def user_has_permission( return False try: - # Check if user is admin (has all permissions) - if hasattr(user, "roles"): - for role in user.roles: - if role.name in ("Admin", "admin"): - return True + from flask import current_app + + # Check if user is admin (has all permissions). Use the configured + # admin role name rather than hardcoding "Admin", so deployments that + # rename the admin role (AUTH_ROLE_ADMIN) still grant admins the bypass. + admin_role_name = current_app.config["AUTH_ROLE_ADMIN"] + # Collect role names granted directly AND inherited via group + # membership (FAB users can receive the admin role through a group), + # so a group-admin still gets the bypass. + role_names = {role.name for role in getattr(user, "roles", None) or []} + for group in getattr(user, "groups", None) or []: + role_names.update(role.name for role in getattr(group, "roles", None) or []) + if admin_role_name in role_names: + return True # Check specific permission from superset import security_manager diff --git a/tests/unit_tests/mcp_service/utils/test_permissions_utils.py b/tests/unit_tests/mcp_service/utils/test_permissions_utils.py index 160fccb6344e..a10748784422 100644 --- a/tests/unit_tests/mcp_service/utils/test_permissions_utils.py +++ b/tests/unit_tests/mcp_service/utils/test_permissions_utils.py @@ -17,15 +17,50 @@ """Tests for MCP field-level permission helpers.""" -from unittest.mock import Mock +from unittest.mock import Mock, patch + +from flask import current_app from superset.mcp_service.utils.permissions_utils import ( apply_field_permissions_to_columns, filter_sensitive_data, get_allowed_fields, + user_has_permission, ) +def test_user_has_permission_admin_uses_configured_role_name(): + """The admin bypass honors AUTH_ROLE_ADMIN, not a hardcoded 'Admin'.""" + + def _role(name: str) -> Mock: + role = Mock() + role.name = name # set after construction (Mock intercepts name=) + return role + + original = current_app.config["AUTH_ROLE_ADMIN"] + current_app.config["AUTH_ROLE_ADMIN"] = "Superuser" + try: + admin = Mock() + admin.roles = [_role("Superuser")] + admin.groups = [] + assert user_has_permission(admin, "can_read", "Chart") is True + + # The previously-hardcoded "Admin" name no longer grants the bypass; + # it falls through to the real permission check. Mock that check to + # deny explicitly (rather than relying on incidental Mock behavior) and + # assert the helper actually consulted security_manager.has_access. + not_admin = Mock() + not_admin.roles = [_role("Admin")] + not_admin.groups = [] + with patch( + "superset.security_manager.has_access", return_value=False + ) as has_access: + assert user_has_permission(not_admin, "can_read", "Chart") is False + has_access.assert_called_once_with("can_read", "Chart", not_admin) + finally: + current_app.config["AUTH_ROLE_ADMIN"] = original + + def test_get_allowed_fields_always_denies_user_directory_fields(): user = Mock() user.roles = [] From 12bef03f4aff4308c8a35d97d600616e8102e08a Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Tue, 2 Jun 2026 21:23:48 -0700 Subject: [PATCH 4/6] fix(jinja): apply consistent escaping to url_param values from request args (#40633) Co-authored-by: Claude Code --- superset/jinja_context.py | 12 +++++++----- tests/unit_tests/jinja_context_test.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/superset/jinja_context.py b/superset/jinja_context.py index 2a2e838708fc..1c8b3d1f8592 100644 --- a/superset/jinja_context.py +++ b/superset/jinja_context.py @@ -288,11 +288,13 @@ def url_param( from superset.views.utils import get_form_data if has_request_context() and request.args.get(param): - return request.args.get(param, default) - - form_data, _ = get_form_data() - url_params = form_data.get("url_params") or {} - result = url_params.get(param, default) + result = request.args.get(param, default) + else: + form_data, _ = get_form_data() + url_params = form_data.get("url_params") or {} + result = url_params.get(param, default) + # Escape the value regardless of its source (request args or form + # data); both are interpolated into the rendered SQL. if result and escape_result and self.dialect: # use the dialect specific quoting logic to escape string result = String().literal_processor(dialect=self.dialect)(value=result)[ diff --git a/tests/unit_tests/jinja_context_test.py b/tests/unit_tests/jinja_context_test.py index cfdbc2090d0b..7c9353dd1ad8 100644 --- a/tests/unit_tests/jinja_context_test.py +++ b/tests/unit_tests/jinja_context_test.py @@ -438,6 +438,25 @@ def test_url_param_unescaped_default_form_data() -> None: assert cache.url_param("bar", "O'Malley", escape_result=False) == "O'Malley" +def test_url_param_escaped_request_args() -> None: + """ + Values read from the request query string are escaped the same way as + values from ``form_data`` (both are interpolated into the rendered SQL). + """ + with current_app.test_request_context(query_string={"foo": "O'Brien"}): + cache = ExtraCache(dialect=dialect()) + assert cache.url_param("foo") == "O''Brien" + + +def test_url_param_unescaped_request_args() -> None: + """ + ``escape_result=False`` still opts out of escaping for request-args values. + """ + with current_app.test_request_context(query_string={"foo": "O'Brien"}): + cache = ExtraCache(dialect=dialect()) + assert cache.url_param("foo", escape_result=False) == "O'Brien" + + def test_safe_proxy_primitive() -> None: """ Test the ``safe_proxy`` helper with a function returning a ``str``. From df21fe657186d65d6065fe78c11d94d07bffe63a Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Tue, 2 Jun 2026 21:51:32 -0700 Subject: [PATCH 5/6] chore(mcp): return a generic error from the webdriver pool-stats endpoint (#40559) Co-authored-by: Claude Code --- .../mcp_service/screenshot/webdriver_config.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/superset/mcp_service/screenshot/webdriver_config.py b/superset/mcp_service/screenshot/webdriver_config.py index 7c9a017965fb..997e355d7c6d 100644 --- a/superset/mcp_service/screenshot/webdriver_config.py +++ b/superset/mcp_service/screenshot/webdriver_config.py @@ -19,8 +19,11 @@ WebDriver pool configuration defaults for Superset MCP service """ +import logging from typing import Any, Dict +logger = logging.getLogger(__name__) + # Default WebDriver pool configuration DEFAULT_WEBDRIVER_POOL_CONFIG = { # Maximum number of WebDriver instances to keep in the pool @@ -69,9 +72,9 @@ def get_pool_stats_endpoint() -> Any: """ def pool_stats() -> Any: - try: - from flask import jsonify + from flask import jsonify + try: from superset.mcp_service.screenshot.webdriver_pool import ( get_webdriver_pool, ) @@ -80,10 +83,11 @@ def pool_stats() -> Any: stats = pool.get_stats() return jsonify({"webdriver_pool": stats, "status": "healthy"}) - except Exception as e: - from flask import jsonify - - return jsonify({"error": str(e), "status": "error"}), 500 + except Exception: + logger.exception("Failed to retrieve webdriver pool stats") + return jsonify( + {"error": "Failed to retrieve pool stats", "status": "error"} + ), 500 return pool_stats From fa41769a08aa7d51efeacbd01bb89570aca2fdff Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Tue, 2 Jun 2026 22:58:30 -0700 Subject: [PATCH 6/6] fix(embedded): enforce configured allowed domains for postMessage origin (#40629) Co-authored-by: Claude Code --- UPDATING.md | 6 + superset-frontend/src/embedded/index.tsx | 30 +---- .../src/embedded/originValidation.test.ts | 112 ++++++++++++++++++ .../src/embedded/originValidation.ts | 91 ++++++++++++++ superset-frontend/src/types/bootstrapTypes.ts | 3 + superset/embedded/view.py | 4 + tests/integration_tests/embedded/test_view.py | 33 ++++++ 7 files changed, 252 insertions(+), 27 deletions(-) create mode 100644 superset-frontend/src/embedded/originValidation.test.ts create mode 100644 superset-frontend/src/embedded/originValidation.ts diff --git a/UPDATING.md b/UPDATING.md index f5ee4f80c342..e1012be14d1b 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -28,6 +28,12 @@ assists people when migrating to a new version. YDB SQL parsing now relies on the dedicated [`ydb-sqlglot-plugin`](https://pypi.org/project/ydb-sqlglot-plugin/) dialect, which registers itself with sqlglot automatically. YDB users must install this plugin (e.g., via `pip install "apache-superset[ydb]"`) to avoid a `ValueError` when Superset parses YDB queries. +### Embedded dashboards enforce configured Allowed Domains for postMessage + +The embedded dashboard page now validates the origin of incoming `postMessage` events against the dashboard's configured **Allowed Domains**. The server-rendered embedded page exposes the configured domains in its bootstrap payload, and the frontend rejects message events whose origin is not in that list. + +Enforcement only applies when the Allowed Domains list is non-empty. If the list is empty (the default), any origin is accepted, so there is no behavior change for embeds that did not configure Allowed Domains. + ### Dataset import validates catalog against the target connection Importing a dataset now validates the `catalog` field against the target database connection. When the connection has multi-catalog disabled (`allow_multi_catalog` off) and the dataset's catalog is not the connection's default catalog, the import fails instead of silently persisting the non-default catalog. This matches the validation already enforced on the dataset update path and prevents imported datasets from querying an unintended database. diff --git a/superset-frontend/src/embedded/index.tsx b/superset-frontend/src/embedded/index.tsx index 1a4a9eebcc99..b0a09f81645a 100644 --- a/superset-frontend/src/embedded/index.tsx +++ b/superset-frontend/src/embedded/index.tsx @@ -48,6 +48,7 @@ import { } from './EmbeddedContextProviders'; import { embeddedApi } from './api'; import { getDataMaskChangeTrigger } from './utils'; +import { validateMessageEvent } from './originValidation'; setupPlugins(); setupCodeOverrides({ embedded: true }); @@ -125,8 +126,6 @@ const EmbeddedApp = () => ( const appMountPoint = document.getElementById('app')!; -const MESSAGE_TYPE = '__embedded_comms__'; - function showFailureMessage(message: string) { appMountPoint.innerHTML = message; } @@ -139,17 +138,6 @@ if (!window.parent || window.parent === window) { ); } -// if the page is embedded in an origin that hasn't -// been authorized by the curator, we forbid access entirely. -// todo: check the referrer on the route serving this page instead -// const ALLOW_ORIGINS = ['http://127.0.0.1:9001', 'http://localhost:9001']; -// const parentOrigin = new URL(document.referrer).origin; -// if (!ALLOW_ORIGINS.includes(parentOrigin)) { -// throw new Error( -// `[superset] iframe parent ${parentOrigin} is not in the list of allowed origins`, -// ); -// } - let displayedUnauthorizedToast = false; let root: Root | null = null; let started = false; @@ -225,21 +213,9 @@ function setupGuestClient(guestToken: string) { }); } -function validateMessageEvent(event: MessageEvent) { - // if (!ALLOW_ORIGINS.includes(event.origin)) { - // throw new Error('Message origin is not in the allowed list'); - // } - - if (typeof event.data !== 'object' || event.data.type !== MESSAGE_TYPE) { - throw new Error(`Message type does not match type used for embedded comms`); - } -} - window.addEventListener('message', function embeddedPageInitializer(event) { - try { - validateMessageEvent(event); - } catch (err) { - log('ignoring message unrelated to embedded comms', err, event); + if (!validateMessageEvent(event, bootstrapData.embedded?.allowed_domains)) { + log('ignoring message unrelated to embedded comms', event); return; } diff --git a/superset-frontend/src/embedded/originValidation.test.ts b/superset-frontend/src/embedded/originValidation.test.ts new file mode 100644 index 000000000000..2ccdf622f90f --- /dev/null +++ b/superset-frontend/src/embedded/originValidation.test.ts @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { + MESSAGE_TYPE, + isMessageOriginAllowed, + validateMessageEvent, +} from './originValidation'; + +const makeEvent = (origin: string, data: unknown): MessageEvent => + ({ origin, data }) as MessageEvent; + +const validData = { type: MESSAGE_TYPE, handshake: 'port transfer' }; + +test('isMessageOriginAllowed allows any origin when the list is undefined', () => { + expect(isMessageOriginAllowed('https://anywhere.example.com')).toBe(true); +}); + +test('isMessageOriginAllowed allows any origin when the list is empty', () => { + expect(isMessageOriginAllowed('https://anywhere.example.com', [])).toBe(true); +}); + +test('isMessageOriginAllowed allows an origin that is in the list', () => { + expect( + isMessageOriginAllowed('https://allowed.example.com', [ + 'https://allowed.example.com', + ]), + ).toBe(true); +}); + +test('isMessageOriginAllowed matches a listed domain with a trailing slash', () => { + expect( + isMessageOriginAllowed('https://allowed.example.com', [ + 'https://allowed.example.com/', + ]), + ).toBe(true); +}); + +test('isMessageOriginAllowed matches a listed domain that includes a path', () => { + expect( + isMessageOriginAllowed('https://allowed.example.com', [ + 'https://allowed.example.com/embed', + ]), + ).toBe(true); +}); + +test('isMessageOriginAllowed rejects an origin that is not in the list and warns', () => { + const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); + expect( + isMessageOriginAllowed('https://evil.example.com', [ + 'https://allowed.example.com', + ]), + ).toBe(false); + expect(warn).toHaveBeenCalledTimes(1); + warn.mockRestore(); +}); + +test('validateMessageEvent accepts a valid embedded message from any origin when unrestricted', () => { + const event = makeEvent('https://anywhere.example.com', validData); + expect(validateMessageEvent(event)).toBe(true); + expect(validateMessageEvent(event, [])).toBe(true); +}); + +test('validateMessageEvent accepts a valid embedded message from a listed origin', () => { + const event = makeEvent('https://allowed.example.com', validData); + expect(validateMessageEvent(event, ['https://allowed.example.com'])).toBe( + true, + ); +}); + +test('validateMessageEvent rejects a message from an origin not in a non-empty list', () => { + const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); + const event = makeEvent('https://evil.example.com', validData); + expect(validateMessageEvent(event, ['https://allowed.example.com'])).toBe( + false, + ); + warn.mockRestore(); +}); + +test('validateMessageEvent rejects a message whose data type does not match', () => { + const event = makeEvent('https://allowed.example.com', { + type: 'something-else', + }); + expect(validateMessageEvent(event, ['https://allowed.example.com'])).toBe( + false, + ); +}); + +test('validateMessageEvent rejects a message whose data is not an object', () => { + const event = makeEvent('https://allowed.example.com', 'not-an-object'); + expect(validateMessageEvent(event)).toBe(false); +}); + +test('validateMessageEvent rejects a message whose data is null', () => { + const event = makeEvent('https://allowed.example.com', null); + expect(validateMessageEvent(event)).toBe(false); +}); diff --git a/superset-frontend/src/embedded/originValidation.ts b/superset-frontend/src/embedded/originValidation.ts new file mode 100644 index 000000000000..022c692f4225 --- /dev/null +++ b/superset-frontend/src/embedded/originValidation.ts @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// The message type used for the embedded comms handshake. Must stay in sync +// with the parent SDK's handshake messages. +export const MESSAGE_TYPE = '__embedded_comms__'; + +/** + * Normalizes an allowed-domain entry to its origin (`scheme://host[:port]`), + * matching the comparison the backend performs via `flask_wtf.csrf.same_origin`. + * + * A browser-provided `event.origin` never carries a path or trailing slash, but + * configured allowed domains may (e.g. `https://example.com/` or + * `https://app.example.com/embed`). Reducing each entry to its origin keeps the + * frontend and backend in agreement about what an "allowed domain" is. Entries + * that cannot be parsed as a URL fall back to the raw value so an exact match is + * still possible. + */ +function normalizeToOrigin(domain: string): string { + try { + return new URL(domain).origin; + } catch { + return domain; + } +} + +/** + * Validates the origin of an incoming postMessage event against the dashboard's + * configured allowed domains. + * + * Enforcement is opt-in by configuration: if the allowed-domains list is empty + * or undefined, any origin is accepted (no restriction), which preserves the + * historical behavior for embeds that did not configure domains. When the list + * is non-empty, only origins present in the list are accepted. + */ +export function isMessageOriginAllowed( + origin: string, + allowedDomains?: string[], +): boolean { + if (!allowedDomains || allowedDomains.length === 0) { + return true; + } + if (allowedDomains.some(domain => normalizeToOrigin(domain) === origin)) { + return true; + } + // eslint-disable-next-line no-console + console.warn( + `[superset] ignoring embedded message from origin "${origin}" which is not in the list of allowed domains`, + ); + return false; +} + +/** + * Validates that an incoming message is intended for embedded comms and that it + * originates from an allowed domain. Returns `true` when the message should be + * processed, `false` otherwise. + */ +export function validateMessageEvent( + event: MessageEvent, + allowedDomains?: string[], +): boolean { + if (!isMessageOriginAllowed(event.origin, allowedDomains)) { + return false; + } + + if ( + typeof event.data !== 'object' || + event.data === null || + event.data.type !== MESSAGE_TYPE + ) { + return false; + } + + return true; +} diff --git a/superset-frontend/src/types/bootstrapTypes.ts b/superset-frontend/src/types/bootstrapTypes.ts index 610767c389a8..4b8e646fbbf1 100644 --- a/superset-frontend/src/types/bootstrapTypes.ts +++ b/superset-frontend/src/types/bootstrapTypes.ts @@ -178,6 +178,9 @@ export interface BootstrapData { config?: any; embedded?: { dashboard_id: string; + // Domains allowed to embed this dashboard. An empty/undefined list means + // any domain is allowed (no restriction). + allowed_domains?: string[]; }; requested_query?: JsonObject; } diff --git a/superset/embedded/view.py b/superset/embedded/view.py index 6e5419286d38..a48b086336bd 100644 --- a/superset/embedded/view.py +++ b/superset/embedded/view.py @@ -83,6 +83,10 @@ def embedded( "common": common_bootstrap_payload(), "embedded": { "dashboard_id": embedded.dashboard_id, + # The list of domains allowed to embed this dashboard. An empty + # list means any domain is allowed (no restriction). The frontend + # uses this to validate the origin of incoming postMessage events. + "allowed_domains": embedded.allowed_domains, }, } diff --git a/tests/integration_tests/embedded/test_view.py b/tests/integration_tests/embedded/test_view.py index f4d5ae692556..800be6de9a4a 100644 --- a/tests/integration_tests/embedded/test_view.py +++ b/tests/integration_tests/embedded/test_view.py @@ -16,6 +16,8 @@ # under the License. from __future__ import annotations +import html +import re from typing import TYPE_CHECKING from unittest import mock @@ -24,6 +26,7 @@ from superset import db from superset.daos.dashboard import EmbeddedDashboardDAO from superset.models.dashboard import Dashboard +from superset.utils import json from tests.integration_tests.fixtures.birth_names_dashboard import ( load_birth_names_dashboard_with_slices, # noqa: F401 load_birth_names_data, # noqa: F401 @@ -36,6 +39,14 @@ from flask.testing import FlaskClient +def _extract_bootstrap_data(response_data: bytes) -> dict[str, Any]: + """Parse the JSON bootstrap payload embedded in the SPA template.""" + html_body = response_data.decode("utf-8") + match = re.search(r'data-bootstrap="([^"]*)"', html_body) + assert match is not None, "bootstrap payload not found in response" + return json.loads(html.unescape(match.group(1))) + + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") @mock.patch.dict( "superset.extensions.feature_flag_manager._feature_flags", @@ -48,6 +59,28 @@ def test_get_embedded_dashboard(client: FlaskClient[Any]): # noqa: F811 uri = f"embedded/{embedded.uuid}" response = client.get(uri) assert response.status_code == 200 + # The bootstrap payload exposes the (empty) allowed-domains list so the + # frontend can validate postMessage origins. + bootstrap = _extract_bootstrap_data(response.data) + assert bootstrap["embedded"]["allowed_domains"] == [] + + +@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") +@mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + EMBEDDED_SUPERSET=True, +) +def test_get_embedded_dashboard_bootstrap_includes_allowed_domains( + client: FlaskClient[Any], # noqa: F811 +): + dash = db.session.query(Dashboard).filter_by(slug="births").first() + embedded = EmbeddedDashboardDAO.upsert(dash, ["https://allowed.example.com"]) + db.session.flush() + uri = f"embedded/{embedded.uuid}" + response = client.get(uri, headers={"Referer": "https://allowed.example.com"}) + assert response.status_code == 200 + bootstrap = _extract_bootstrap_data(response.data) + assert bootstrap["embedded"]["allowed_domains"] == ["https://allowed.example.com"] @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")