From b9dc9d722e62d2417076272e29c08280ac6c51d2 Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Wed, 3 Jun 2026 00:14:48 -0700 Subject: [PATCH 01/13] fix(export): sanitize user-supplied CSV export filename (charts + SQL Lab) (#40632) Co-authored-by: Claude Code --- superset/charts/data/api.py | 7 ++ superset/sqllab/api.py | 8 +++ .../unit_tests/charts/test_chart_data_api.py | 31 +++++++++ tests/unit_tests/sqllab/__init__.py | 16 +++++ tests/unit_tests/sqllab/api_test.py | 64 +++++++++++++++++++ 5 files changed, 126 insertions(+) create mode 100644 tests/unit_tests/sqllab/__init__.py create mode 100644 tests/unit_tests/sqllab/api_test.py diff --git a/superset/charts/data/api.py b/superset/charts/data/api.py index 3b6fd48b4fef..0be87a4f8b37 100644 --- a/superset/charts/data/api.py +++ b/superset/charts/data/api.py @@ -616,6 +616,13 @@ def _get_data_response( def _extract_export_params_from_request(self) -> tuple[str | None, int | None]: """Extract filename and expected_rows from request for streaming exports.""" filename = request.form.get("filename") + if filename: + # Sanitize the user-supplied filename before it is used in the + # Content-Disposition header (consistent with the generated-name + # path). secure_filename may reduce a name consisting entirely of + # unsupported characters to an empty string, in which case fall back + # to the generated default downstream. + filename = secure_filename(filename) or None if filename: logger.info("FRONTEND PROVIDED FILENAME: %s", filename) diff --git a/superset/sqllab/api.py b/superset/sqllab/api.py index 3229773eaf7e..5b6d585f9c79 100644 --- a/superset/sqllab/api.py +++ b/superset/sqllab/api.py @@ -419,6 +419,14 @@ def _create_streaming_csv_response( command = StreamingSqlResultExportCommand(client_id, chunk_size) command.validate() + if filename: + # Sanitize the user-supplied filename before it is used in the + # Content-Disposition header (consistent with the generated-name + # path below). secure_filename may reduce a name consisting entirely + # of unsafe characters to an empty string, in which case we fall + # back to the generated default. + filename = secure_filename(filename) or None + if not filename: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = secure_filename(f"sqllab_{client_id}_{timestamp}.csv") diff --git a/tests/unit_tests/charts/test_chart_data_api.py b/tests/unit_tests/charts/test_chart_data_api.py index 51218258c816..15b987c5e09b 100644 --- a/tests/unit_tests/charts/test_chart_data_api.py +++ b/tests/unit_tests/charts/test_chart_data_api.py @@ -16,6 +16,8 @@ # under the License. from __future__ import annotations +from unittest.mock import MagicMock + from flask import Flask, g from superset.utils import json @@ -69,3 +71,32 @@ def test_get_data_sets_g_form_data_without_dashboard_filter() -> None: assert hasattr(g, "form_data") assert g.form_data["datasource"] == {"id": 42, "type": "table"} assert g.form_data["queries"][0]["columns"] == ["col1"] + + +def _extract_filename(form_value: str) -> str | None: + """Run _extract_export_params_from_request with a form filename value.""" + from superset.charts.data.api import ChartDataRestApi + + app = Flask(__name__) + with app.test_request_context("/", method="POST", data={"filename": form_value}): + filename, _ = ChartDataRestApi._extract_export_params_from_request(MagicMock()) + return filename + + +def test_extract_export_filename_sanitizes_special_characters() -> None: + """A malicious/path-y filename is sanitized before header/disk use.""" + filename = _extract_filename('../../etc/pa"ss\r\nSet-Cookie: x') + + assert filename is not None + for bad in ("/", "\\", '"', "\r", "\n", ".."): + assert bad not in filename + + +def test_extract_export_filename_preserves_normal_name() -> None: + """A normal filename passes through unchanged.""" + assert _extract_filename("my_export.csv") == "my_export.csv" + + +def test_extract_export_filename_all_special_falls_back_to_none() -> None: + """A name with no usable characters becomes None (generated downstream).""" + assert _extract_filename("***") is None diff --git a/tests/unit_tests/sqllab/__init__.py b/tests/unit_tests/sqllab/__init__.py new file mode 100644 index 000000000000..13a83393a912 --- /dev/null +++ b/tests/unit_tests/sqllab/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/unit_tests/sqllab/api_test.py b/tests/unit_tests/sqllab/api_test.py new file mode 100644 index 000000000000..384e8cb5ddfc --- /dev/null +++ b/tests/unit_tests/sqllab/api_test.py @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import re +from unittest.mock import MagicMock, patch + +from flask import Flask + + +def _disposition_filename(form_filename: str | None) -> str: + """Return the filename rendered into a streaming CSV Content-Disposition.""" + from superset.sqllab.api import SqlLabRestApi + + app = Flask(__name__) + app.config["CSV_EXPORT"] = {"encoding": "utf-8"} + with ( + app.app_context(), + patch("superset.sqllab.api.StreamingSqlResultExportCommand") as command_cls, + ): + command = command_cls.return_value + command.run.return_value = lambda: iter([b""]) + response = SqlLabRestApi._create_streaming_csv_response( + MagicMock(), client_id="abc123", filename=form_filename + ) + disposition = response.headers["Content-Disposition"] + match = re.search(r'filename="([^"]*)"', disposition) + assert match is not None, disposition + return match.group(1) + + +def test_streaming_csv_sanitizes_user_filename() -> None: + """A path-y / header-injecting filename is sanitized before the header.""" + filename = _disposition_filename('../../etc/pa"ss\r\nSet-Cookie: x.csv') + + for bad in ("/", "\\", '"', "\r", "\n", ".."): + assert bad not in filename + + +def test_streaming_csv_preserves_normal_filename() -> None: + """A normal filename passes through unchanged.""" + assert _disposition_filename("my_results.csv") == "my_results.csv" + + +def test_streaming_csv_falls_back_when_filename_empty() -> None: + """An all-unsafe filename collapses to the generated default, not empty.""" + filename = _disposition_filename("///") + + assert filename.startswith("sqllab_abc123_") + assert filename.endswith(".csv") From 8e4a460cc7142de6fe62ddfd2f5ab274ed5306d5 Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:52:22 +0100 Subject: [PATCH 02/13] fix(charts): apply DISALLOWED_SQL_FUNCTIONS gate to adhoc expressions (#40568) Co-authored-by: Claude Opus 4.7 --- superset/models/helpers.py | 47 ++++++++ tests/unit_tests/models/helpers_test.py | 144 ++++++++++++++++++++++++ 2 files changed, 191 insertions(+) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index e2ff6407879b..26bc15ecca8d 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -90,6 +90,8 @@ InvalidPostProcessingError, QueryClauseValidationException, QueryObjectValidationError, + SupersetDisallowedSQLFunctionException, + SupersetDisallowedSQLTableException, SupersetErrorException, SupersetErrorsException, SupersetSecurityException, @@ -1192,6 +1194,51 @@ def _process_sql_expression( # pylint: disable=too-many-arguments expression = sanitize_clause(expression, engine) except QueryClauseValidationException as ex: raise QueryObjectValidationError(ex.message) from ex + # Adhoc expressions are user-controlled SQL that ends up inside a + # `literal_column(...)`. Apply the operator-configured + # `DISALLOWED_SQL_FUNCTIONS` / `DISALLOWED_SQL_TABLES` gates at the + # validation step so a dangerous function call (e.g. `version()`, + # `pg_read_file(...)`, `query_to_xml(...)`) is rejected before the + # expression is incorporated into the final SQL. This complements + # the same gate applied at query-execution time and gives the + # adhoc-expression path defense in depth. + disallowed_functions = app.config["DISALLOWED_SQL_FUNCTIONS"].get( + engine, set() + ) + disallowed_tables = app.config["DISALLOWED_SQL_TABLES"].get(engine, set()) + if disallowed_functions or disallowed_tables: + # `_process_select_expression` (and siblings) pre-wraps the + # input with `SELECT ...`; other callers pass bare + # expressions. Detect and don't double-wrap, otherwise + # `SELECT SELECT ...` fails the sqlglot parse. + sql_to_check = ( + expression + if expression.strip().upper().startswith("SELECT") + else f"SELECT {expression}" + ) + parsed = SQLScript(sql_to_check, engine=engine) + if disallowed_functions and parsed.check_functions_present( + disallowed_functions + ): + raise SupersetDisallowedSQLFunctionException(disallowed_functions) + if disallowed_tables and parsed.check_tables_present(disallowed_tables): + # Report only the tables actually found in the expression, + # mirroring the canonical execution-time gate in + # `superset.sql_lab._validate_query` so the user-facing + # error doesn't echo the operator's full denylist. + present_tables = { + table.table.lower() + for statement in parsed.statements + for table in statement.tables + } + found_tables = { + table + for table in disallowed_tables + if table.lower() in present_tables + } + raise SupersetDisallowedSQLTableException( + found_tables or disallowed_tables + ) return expression def _process_select_expression( diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py index b140aea7bd92..c93f6121dbd5 100644 --- a/tests/unit_tests/models/helpers_test.py +++ b/tests/unit_tests/models/helpers_test.py @@ -32,6 +32,7 @@ from superset.superset_typing import AdhocColumn from superset.utils.core import GenericDataType +from tests.unit_tests.conftest import with_feature_flags if TYPE_CHECKING: from superset.jinja_context import BaseTemplateProcessor @@ -2703,3 +2704,146 @@ def test_format_time_humanized_skips_activation_for_english( instance._format_time_humanized(datetime.now() - timedelta(hours=2)) mock_activate.assert_not_called() + + +# ----------------------------------------------------------------------------- +# _process_sql_expression denylist gate (adhoc expression validation) +# ----------------------------------------------------------------------------- + + +def _patch_disallowed( + mocker: MockerFixture, + functions: dict[str, set[str]] | None = None, + tables: dict[str, set[str]] | None = None, +) -> None: + """Inject the engine-keyed denylists into the live Flask app config that + `_process_sql_expression` consults via `app.config[...]`.""" + mocker.patch.dict( + "flask.current_app.config", + { + "DISALLOWED_SQL_FUNCTIONS": functions or {}, + "DISALLOWED_SQL_TABLES": tables or {}, + }, + clear=False, + ) + + +def test_process_sql_expression_rejects_disallowed_function( + mocker: MockerFixture, database: Database +) -> None: + """Adhoc expressions are user-controlled SQL incorporated into the final + query via `literal_column(...)`. A function name on the operator's + DISALLOWED_SQL_FUNCTIONS list must be rejected at validation time, + before the rendered SQL is handed to the database.""" + from superset.connectors.sqla.models import SqlaTable + from superset.exceptions import SupersetDisallowedSQLFunctionException + + _patch_disallowed(mocker, functions={"postgresql": {"version"}}) + table = SqlaTable(database=database, schema=None, table_name="t") + with pytest.raises(SupersetDisallowedSQLFunctionException): + table._process_sql_expression( + expression="version()", + database_id=database.id, + engine="postgresql", + schema="", + template_processor=None, + ) + + +def test_process_sql_expression_rejects_disallowed_function_in_aggregate( + mocker: MockerFixture, database: Database +) -> None: + """A denylisted function wrapped in a legitimate aggregate + (`MAX(version())`) must still be rejected: the wrapper is the obvious + bypass attempt.""" + from superset.connectors.sqla.models import SqlaTable + from superset.exceptions import SupersetDisallowedSQLFunctionException + + _patch_disallowed(mocker, functions={"postgresql": {"version"}}) + table = SqlaTable(database=database, schema=None, table_name="t") + with pytest.raises(SupersetDisallowedSQLFunctionException): + table._process_sql_expression( + expression="MAX(version())", + database_id=database.id, + engine="postgresql", + schema="", + template_processor=None, + ) + + +@with_feature_flags(ALLOW_ADHOC_SUBQUERY=True) +def test_process_sql_expression_rejects_disallowed_table( + mocker: MockerFixture, database: Database +) -> None: + """Adhoc subqueries that reference a denylisted table must be rejected, + and the raised exception must carry only the tables actually found in + the expression (matching the canonical execution-time gate in + `superset.sql_lab._validate_query`). The branch is only reachable when + `ALLOW_ADHOC_SUBQUERY=True`; otherwise `validate_adhoc_subquery` rejects + the subquery first.""" + from superset.connectors.sqla.models import SqlaTable + from superset.exceptions import SupersetDisallowedSQLTableException + + _patch_disallowed( + mocker, tables={"postgresql": {"pg_authid", "pg_shadow", "pg_stat_activity"}} + ) + table = SqlaTable(database=database, schema=None, table_name="t") + with pytest.raises(SupersetDisallowedSQLTableException) as exc_info: + table._process_sql_expression( + expression="(SELECT id FROM pg_authid)", + database_id=database.id, + engine="postgresql", + schema="", + template_processor=None, + ) + # Assert on substring (set repr ordering): only the offending table is + # echoed back to the user, not the full operator denylist. + message = exc_info.value.error.message + assert "pg_authid" in message + assert "pg_shadow" not in message + assert "pg_stat_activity" not in message + + +def test_process_sql_expression_allows_benign_expression( + mocker: MockerFixture, database: Database +) -> None: + """Negative control: a benign aggregate over a regular column must pass + even when denylists are configured.""" + from superset.connectors.sqla.models import SqlaTable + + _patch_disallowed( + mocker, + functions={"postgresql": {"version"}}, + tables={"postgresql": {"pg_authid"}}, + ) + table = SqlaTable(database=database, schema=None, table_name="t") + result = table._process_sql_expression( + expression="SUM(amount)", + database_id=database.id, + engine="postgresql", + schema="", + template_processor=None, + ) + assert result is not None + assert "SUM" in result.upper() + + +def test_process_sql_expression_no_gate_when_denylists_empty( + mocker: MockerFixture, database: Database +) -> None: + """When neither DISALLOWED_SQL_FUNCTIONS nor DISALLOWED_SQL_TABLES has an + entry for the engine, the new gate must not run an extra parse: any + SQL that passes the pre-existing `sanitize_clause` validation is + accepted.""" + from superset.connectors.sqla.models import SqlaTable + + _patch_disallowed(mocker, functions={}, tables={}) + table = SqlaTable(database=database, schema=None, table_name="t") + result = table._process_sql_expression( + expression="version()", + database_id=database.id, + engine="postgresql", + schema="", + template_processor=None, + ) + assert result is not None From faa76f6741241db6ffa4ba983ef13b0a27adec0b Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:55:09 +0100 Subject: [PATCH 03/13] fix(embedding): add optional dataset allowlist to guest tokens (#39302) Co-authored-by: Claude Sonnet 4.6 --- superset-frontend/.eslintrc.js | 2 +- .../transformProps.test.ts | 4 +- .../src/components/Echart.tsx | 7 +- .../controls/CollectionControl/index.tsx | 4 +- superset/security/api.py | 17 +- superset/security/guest_token.py | 15 +- superset/security/manager.py | 27 +- .../test_guest_token_dataset_allowlist.py | 273 ++++++++++++++++++ 8 files changed, 338 insertions(+), 11 deletions(-) create mode 100644 tests/unit_tests/security/test_guest_token_dataset_allowlist.py diff --git a/superset-frontend/.eslintrc.js b/superset-frontend/.eslintrc.js index 002d3f392521..d7b72a6d93ef 100644 --- a/superset-frontend/.eslintrc.js +++ b/superset-frontend/.eslintrc.js @@ -80,7 +80,7 @@ const restrictedImportsRules = { 'no-jest-mock-console': { name: 'jest-mock-console', message: 'Please use native Jest spies, i.e. jest.spyOn(console, "warn")', - } + }, }; module.exports = { diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/BigNumber/BigNumberWithTrendline/transformProps.test.ts b/superset-frontend/plugins/plugin-chart-echarts/src/BigNumber/BigNumberWithTrendline/transformProps.test.ts index 6ce84f5ffe4d..6a9752d57b9a 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/BigNumber/BigNumberWithTrendline/transformProps.test.ts +++ b/superset-frontend/plugins/plugin-chart-echarts/src/BigNumber/BigNumberWithTrendline/transformProps.test.ts @@ -288,9 +288,7 @@ describe('BigNumberWithTrendline transformProps', () => { height: 300, queriesData: [ { - data: [ - { __timestamp: 1, value: 100 }, - ] as unknown as BigNumberDatum[], + data: [{ __timestamp: 1, value: 100 }] as unknown as BigNumberDatum[], colnames: ['__timestamp', 'value'], coltypes: ['TEMPORAL', 'NUMERIC'], }, diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/components/Echart.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/components/Echart.tsx index 97b55f2a9a76..618cb62142c3 100644 --- a/superset-frontend/plugins/plugin-chart-echarts/src/components/Echart.tsx +++ b/superset-frontend/plugins/plugin-chart-echarts/src/components/Echart.tsx @@ -284,8 +284,11 @@ function Echart( // setOption(notMerge:true) replaces the dataZoom config, dropping any // range the user has engaged. Preserve it across the call. const previousZoom = notMerge - ? (chartRef.current?.getOption() as { dataZoom?: DataZoomComponentOption[] }) - ?.dataZoom + ? ( + chartRef.current?.getOption() as { + dataZoom?: DataZoomComponentOption[]; + } + )?.dataZoom : undefined; chartRef.current?.setOption(themedEchartOptions, { notMerge, diff --git a/superset-frontend/src/explore/components/controls/CollectionControl/index.tsx b/superset-frontend/src/explore/components/controls/CollectionControl/index.tsx index 2b0e01b01292..1a0393577ad0 100644 --- a/superset-frontend/src/explore/components/controls/CollectionControl/index.tsx +++ b/superset-frontend/src/explore/components/controls/CollectionControl/index.tsx @@ -188,7 +188,9 @@ function CollectionControl({ // Two items can collide when keyAccessor returns falsy and the index // fallback is used — breaking dnd-kit reordering and React reconciliation. // Assign a stable nanoid per item ref when no key is available. - const generatedIdsRef = useRef>(new WeakMap()); + const generatedIdsRef = useRef>( + new WeakMap(), + ); const itemIds = useMemo( () => value.map(item => { diff --git a/superset/security/api.py b/superset/security/api.py index 9c8d035760e2..daff4d980263 100644 --- a/superset/security/api.py +++ b/superset/security/api.py @@ -83,6 +83,18 @@ class GuestTokenCreateSchema(PermissiveSchema): user = fields.Nested(UserSchema) resources = fields.List(fields.Nested(ResourceSchema), required=True) rls = fields.List(fields.Nested(RlsRuleSchema), required=True) + datasets = fields.List( + fields.Integer(), + load_default=None, + allow_none=True, + metadata={ + "description": ( + "Optional allowlist of dataset IDs the guest may access. " + "When omitted all datasets linked to the embedded dashboard " + "are accessible, preserving the default behaviour." + ) + }, + ) class RoleResponseSchema(PermissiveSchema): @@ -187,7 +199,10 @@ def guest_token(self) -> Response: # make sure username doesn't reference an existing user # check rls rules for validity? token = self.appbuilder.sm.create_guest_access_token( - body["user"], body["resources"], body["rls"] + body.get("user", {}), + body["resources"], + body["rls"], + **({"datasets": body["datasets"]} if "datasets" in body else {}), ) return self.response(200, token=token) except EmbeddedDashboardNotFoundError as error: diff --git a/superset/security/guest_token.py b/superset/security/guest_token.py index e0403e07e104..4f20406f2f28 100644 --- a/superset/security/guest_token.py +++ b/superset/security/guest_token.py @@ -45,7 +45,9 @@ class GuestTokenRlsRule(TypedDict): clause: str -class GuestToken(TypedDict): +class _GuestTokenRequired(TypedDict): + """Required JWT claims for a guest token payload.""" + iat: float exp: float user: GuestTokenUser @@ -53,6 +55,17 @@ class GuestToken(TypedDict): rls_rules: list[GuestTokenRlsRule] +class GuestToken(_GuestTokenRequired, total=False): + """JWT claims for an embedded guest token. + + ``datasets`` is an optional allowlist of dataset IDs the guest may access. + When absent the guest can access all datasets linked to the embedded dashboard, + preserving existing behaviour. When present only the listed IDs are permitted. + """ + + datasets: list[int] + + class GuestUser(AnonymousUserMixin): """ Used as the "anonymous" user in case of guest authentication (embedded) diff --git a/superset/security/manager.py b/superset/security/manager.py index 737b172cebd6..0b82d325b196 100644 --- a/superset/security/manager.py +++ b/superset/security/manager.py @@ -3208,6 +3208,23 @@ def raise_for_access( # noqa: C901 self.get_datasource_access_error_object(datasource) ) + # When the guest token carries a dataset allowlist, restrict access + # to only those dataset IDs even if the chart/dashboard check above + # would otherwise grant it. Tokens without the ``datasets`` claim + # retain the existing behaviour (all dashboard datasets accessible). + if guest_user := self.get_current_guest_user_if_guest(): + allowed_datasets: Optional[list[int]] = guest_user.guest_token.get( + "datasets" + ) + if allowed_datasets is not None and ( + not isinstance(allowed_datasets, list) + or not all(isinstance(d, int) for d in allowed_datasets) + or datasource.id not in allowed_datasets + ): + raise SupersetSecurityException( + self.get_datasource_access_error_object(datasource) + ) + if dashboard: if self.is_guest_user(): # Guest user is currently used for embedded dashboards only. If the guest # noqa: E501 @@ -3537,6 +3554,7 @@ def create_guest_access_token( user: GuestTokenUser, resources: GuestTokenResources, rls: list[GuestTokenRlsRule], + datasets: Optional[list[int]] = None, ) -> bytes: secret = get_conf()["GUEST_TOKEN_JWT_SECRET"] algo = get_conf()["GUEST_TOKEN_JWT_ALGO"] @@ -3545,7 +3563,7 @@ def create_guest_access_token( # calculate expiration time now = self._get_current_epoch_time() exp = now + exp_seconds - claims = { + claims: dict[str, Any] = { "user": user, "resources": resources, "rls_rules": rls, @@ -3555,6 +3573,8 @@ def create_guest_access_token( "aud": audience, "type": "guest", } + if datasets is not None: + claims["datasets"] = datasets return self.pyjwt_for_guest_token.encode(claims, secret, algorithm=algo) def get_guest_user_from_request(self, req: Request) -> Optional[GuestUser]: @@ -3625,7 +3645,10 @@ def is_guest_user(user: Optional[Any] = None) -> bool: return hasattr(user, "is_guest_user") and user.is_guest_user def get_current_guest_user_if_guest(self) -> Optional[GuestUser]: - return g.user if self.is_guest_user() else None + user = getattr(g, "user", None) + if isinstance(user, GuestUser): + return user + return None def has_guest_access(self, dashboard: "Dashboard") -> bool: user = self.get_current_guest_user_if_guest() diff --git a/tests/unit_tests/security/test_guest_token_dataset_allowlist.py b/tests/unit_tests/security/test_guest_token_dataset_allowlist.py new file mode 100644 index 000000000000..1032eea69194 --- /dev/null +++ b/tests/unit_tests/security/test_guest_token_dataset_allowlist.py @@ -0,0 +1,273 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for the optional dataset allowlist in guest tokens. + +Covers token creation (JWT claims), schema deserialization, and the access +enforcement gate inside raise_for_access(). +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from superset.exceptions import SupersetSecurityException +from superset.security.guest_token import ( + GuestToken, + GuestTokenResourceType, + GuestUser, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_guest_user(datasets: list[int] | None = None) -> GuestUser: + """Build a GuestUser whose token optionally carries a datasets allowlist.""" + token: GuestToken = { + "user": {}, + "resources": [{"type": GuestTokenResourceType.DASHBOARD, "id": "dash-uuid"}], + "rls_rules": [], + "iat": 0, + "exp": 9999999999, + } + if datasets is not None: + token["datasets"] = datasets + return GuestUser(token=token, roles=[]) + + +def _make_datasource(dataset_id: int) -> MagicMock: + """Return a minimal datasource mock with a numeric id.""" + ds = MagicMock() + ds.id = dataset_id + ds.perm = "datasource_access" + return ds + + +# --------------------------------------------------------------------------- +# create_guest_access_token — JWT claims +# --------------------------------------------------------------------------- + + +def test_create_guest_access_token_without_datasets_omits_claim() -> None: + """When datasets=None the JWT must not contain a datasets key.""" + from superset.security.manager import SupersetSecurityManager + + sm = MagicMock(spec=SupersetSecurityManager) + sm._get_current_epoch_time.return_value = 0 + sm._get_guest_token_jwt_audience.return_value = "superset" + sm.pyjwt_for_guest_token = MagicMock() + + with patch( + "superset.security.manager.get_conf", + return_value={ + "GUEST_TOKEN_JWT_SECRET": "secret", + "GUEST_TOKEN_JWT_ALGO": "HS256", + "GUEST_TOKEN_JWT_EXP_SECONDS": 300, + }, + ): + SupersetSecurityManager.create_guest_access_token( + sm, user={}, resources=[], rls=[], datasets=None + ) + + claims = sm.pyjwt_for_guest_token.encode.call_args[0][0] + assert "datasets" not in claims + + +def test_create_guest_access_token_with_datasets_includes_claim() -> None: + """When datasets is provided the JWT must include the datasets claim.""" + from superset.security.manager import SupersetSecurityManager + + sm = MagicMock(spec=SupersetSecurityManager) + sm._get_current_epoch_time.return_value = 0 + sm._get_guest_token_jwt_audience.return_value = "superset" + sm.pyjwt_for_guest_token = MagicMock() + + with patch( + "superset.security.manager.get_conf", + return_value={ + "GUEST_TOKEN_JWT_SECRET": "secret", + "GUEST_TOKEN_JWT_ALGO": "HS256", + "GUEST_TOKEN_JWT_EXP_SECONDS": 300, + }, + ): + SupersetSecurityManager.create_guest_access_token( + sm, user={}, resources=[], rls=[], datasets=[7, 8] + ) + + claims = sm.pyjwt_for_guest_token.encode.call_args[0][0] + assert claims["datasets"] == [7, 8] + + +# --------------------------------------------------------------------------- +# raise_for_access — dataset allowlist enforcement +# --------------------------------------------------------------------------- + + +def _sm_for_access_test(guest_user: GuestUser) -> MagicMock: + """Build a security-manager mock wired to grant basic datasource access + through the guest path so only the allowlist gate is exercised.""" + from superset.security.manager import SupersetSecurityManager + + sm = MagicMock(spec=SupersetSecurityManager) + sm.is_guest_user.return_value = True + sm.get_current_guest_user_if_guest.return_value = guest_user + # Make every upstream access check pass so we isolate the allowlist check. + sm.can_access_schema.return_value = True + return sm + + +def test_raise_for_access_no_datasets_claim_allows_any_datasource() -> None: + """A token without a datasets claim must allow all datasources (backward compat).""" + from superset.security.manager import SupersetSecurityManager + + guest_user = _make_guest_user(datasets=None) + sm = _sm_for_access_test(guest_user) + datasource = _make_datasource(dataset_id=99) + + # can_access_schema returns True so the main block does not raise, + # then we hit our allowlist check — with no claim it must not raise either. + SupersetSecurityManager.raise_for_access(sm, datasource=datasource) # no exception + + +def test_raise_for_access_datasets_claim_allows_listed_datasource() -> None: + """A token with datasets=[7, 8] must allow datasource id=7.""" + from superset.security.manager import SupersetSecurityManager + + guest_user = _make_guest_user(datasets=[7, 8]) + sm = _sm_for_access_test(guest_user) + datasource = _make_datasource(dataset_id=7) + + SupersetSecurityManager.raise_for_access(sm, datasource=datasource) # no exception + + +def test_raise_for_access_datasets_claim_blocks_unlisted_datasource() -> None: + """A token with datasets=[7, 8] must block datasource id=99.""" + from superset.security.manager import SupersetSecurityManager + + guest_user = _make_guest_user(datasets=[7, 8]) + sm = _sm_for_access_test(guest_user) + datasource = _make_datasource(dataset_id=99) + + with pytest.raises(SupersetSecurityException): + SupersetSecurityManager.raise_for_access(sm, datasource=datasource) + + +def test_raise_for_access_empty_datasets_list_blocks_all() -> None: + """An explicit empty allowlist (datasets=[]) must block every datasource.""" + from superset.security.manager import SupersetSecurityManager + + guest_user = _make_guest_user(datasets=[]) + sm = _sm_for_access_test(guest_user) + datasource = _make_datasource(dataset_id=7) + + with pytest.raises(SupersetSecurityException): + SupersetSecurityManager.raise_for_access(sm, datasource=datasource) + + +def test_raise_for_access_malformed_datasets_claim_blocks_access() -> None: + """A non-integer element in the datasets claim must be treated as a denial.""" + from superset.security.manager import SupersetSecurityManager + + # Simulate a token whose datasets claim was tampered to contain strings. + guest_user = _make_guest_user(datasets=None) + guest_user.guest_token["datasets"] = ["7", "8"] # type: ignore[list-item] + sm = _sm_for_access_test(guest_user) + datasource = _make_datasource(dataset_id=7) + + with pytest.raises(SupersetSecurityException): + SupersetSecurityManager.raise_for_access(sm, datasource=datasource) + + +# --------------------------------------------------------------------------- +# GuestTokenCreateSchema — datasets field +# --------------------------------------------------------------------------- + + +def test_guest_token_create_schema_datasets_optional() -> None: + """datasets is optional — a payload without it must load successfully.""" + from superset.security.api import GuestTokenCreateSchema + + schema = GuestTokenCreateSchema() + result = schema.load({"resources": [{"type": "dashboard", "id": "abc"}], "rls": []}) + assert result.get("datasets") is None + + +def test_guest_token_create_schema_datasets_accepted() -> None: + """datasets=[7, 8] must load and be present in the result.""" + from superset.security.api import GuestTokenCreateSchema + + schema = GuestTokenCreateSchema() + result = schema.load( + { + "resources": [{"type": "dashboard", "id": "abc"}], + "rls": [], + "datasets": [7, 8], + } + ) + assert result["datasets"] == [7, 8] + + +# --------------------------------------------------------------------------- +# get_current_guest_user_if_guest — direct implementation coverage +# --------------------------------------------------------------------------- + + +def test_get_current_guest_user_if_guest_returns_guest_user() -> None: + """Returns the GuestUser when g.user is a GuestUser instance.""" + from unittest.mock import patch + + from superset.security.manager import SupersetSecurityManager + + guest_user = _make_guest_user() + sm = MagicMock(spec=SupersetSecurityManager) + + with patch("superset.security.manager.g") as mock_g: + mock_g.user = guest_user + result = SupersetSecurityManager.get_current_guest_user_if_guest(sm) + + assert result is guest_user + + +def test_get_current_guest_user_if_guest_returns_none_for_regular_user() -> None: + """Returns None when g.user is a regular (non-guest) user.""" + from unittest.mock import patch + + from superset.security.manager import SupersetSecurityManager + + regular_user = MagicMock() # not a GuestUser instance + sm = MagicMock(spec=SupersetSecurityManager) + + with patch("superset.security.manager.g") as mock_g: + mock_g.user = regular_user + result = SupersetSecurityManager.get_current_guest_user_if_guest(sm) + + assert result is None + + +def test_raise_for_access_non_guest_skips_allowlist_check() -> None: + """Allowlist check is skipped when get_current_guest_user_if_guest returns None.""" + from superset.security.manager import SupersetSecurityManager + + sm = MagicMock(spec=SupersetSecurityManager) + sm.get_current_guest_user_if_guest.return_value = None + datasource = _make_datasource(dataset_id=99) + + # Should not raise — allowlist block is not entered when there is no guest user. + SupersetSecurityManager.raise_for_access(sm, datasource=datasource) From 725f5ed2a90ea21cbdbbd1f11551685944aa181a Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:55:15 +0100 Subject: [PATCH 04/13] fix(api): enforce per-object ownership validation in chart, dataset, and report commands (#39303) Co-authored-by: Claude Sonnet 4.6 --- superset/charts/api.py | 9 +- superset/commands/chart/create.py | 5 + superset/commands/chart/fave.py | 8 +- superset/commands/chart/unfave.py | 8 +- superset/commands/chart/update.py | 6 +- superset/commands/chart/warm_up_cache.py | 19 ++- superset/commands/dashboard/delete.py | 5 +- superset/commands/dataset/create.py | 8 + superset/commands/dataset/duplicate.py | 8 +- superset/commands/dataset/warm_up_cache.py | 12 +- superset/commands/report/base.py | 46 +++++- superset/dashboards/api.py | 5 +- superset/views/base_api.py | 14 +- superset/views/datasource/utils.py | 17 +- superset/views/datasource/views.py | 24 +++ superset/views/sql_lab/views.py | 53 +++++-- superset/views/users/api.py | 4 + superset/views/utils.py | 4 +- .../charts/commands_tests.py | 23 ++- .../datasets/commands_tests.py | 20 +-- .../commands/chart/test_access_checks.py | 147 ++++++++++++++++++ .../commands/chart/warm_up_cache_test.py | 6 + .../commands/dataset/test_create.py | 9 +- .../commands/dataset/test_duplicate.py | 103 ++++++++++++ .../commands/dataset/test_warm_up_cache.py | 115 ++++++++++++++ .../commands/report/test_create_recipients.py | 14 +- 26 files changed, 620 insertions(+), 72 deletions(-) create mode 100644 tests/unit_tests/commands/chart/test_access_checks.py create mode 100644 tests/unit_tests/commands/dataset/test_duplicate.py create mode 100644 tests/unit_tests/commands/dataset/test_warm_up_cache.py diff --git a/superset/charts/api.py b/superset/charts/api.py index 617129d49c6d..9b692c02d9d7 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -58,6 +58,7 @@ from superset.commands.chart.create import CreateChartCommand from superset.commands.chart.delete import DeleteChartCommand from superset.commands.chart.exceptions import ( + ChartAccessDeniedError, ChartCreateFailedError, ChartDeleteFailedError, ChartForbiddenError, @@ -972,7 +973,7 @@ def add_favorite(self, pk: int) -> Response: AddFavoriteChartCommand(pk).run() except ChartNotFoundError: return self.response_404() - except ChartForbiddenError: + except (ChartAccessDeniedError, ChartForbiddenError): return self.response_403() return self.response(200, result="OK") @@ -1017,9 +1018,9 @@ def remove_favorite(self, pk: int) -> Response: try: DelFavoriteChartCommand(pk).run() except ChartNotFoundError: - self.response_404() - except ChartForbiddenError: - self.response_403() + return self.response_404() + except (ChartAccessDeniedError, ChartForbiddenError): + return self.response_403() return self.response(200, result="OK") diff --git a/superset/commands/chart/create.py b/superset/commands/chart/create.py index e3d3ea93f330..5742fe8d386a 100644 --- a/superset/commands/chart/create.py +++ b/superset/commands/chart/create.py @@ -27,6 +27,7 @@ from superset.commands.base import BaseCommand, CreateMixin from superset.commands.chart.exceptions import ( ChartCreateFailedError, + ChartForbiddenError, ChartInvalidError, DashboardsForbiddenError, DashboardsNotFoundValidationError, @@ -34,6 +35,7 @@ from superset.commands.utils import get_datasource_by_id from superset.daos.chart import ChartDAO from superset.daos.dashboard import DashboardDAO +from superset.exceptions import SupersetSecurityException from superset.utils import json from superset.utils.decorators import on_error, transaction @@ -69,6 +71,9 @@ def validate(self) -> None: try: datasource = get_datasource_by_id(datasource_id, datasource_type) self._properties["datasource_name"] = datasource.name + security_manager.raise_for_access(datasource=datasource) + except SupersetSecurityException as ex: + raise ChartForbiddenError() from ex except ValidationError as ex: exceptions.append(ex) diff --git a/superset/commands/chart/fave.py b/superset/commands/chart/fave.py index f1bf190bc4fe..c21f1c72c287 100644 --- a/superset/commands/chart/fave.py +++ b/superset/commands/chart/fave.py @@ -17,12 +17,15 @@ import logging from functools import partial +from superset import security_manager from superset.commands.base import BaseCommand from superset.commands.chart.exceptions import ( + ChartAccessDeniedError, ChartFaveError, ChartNotFoundError, ) from superset.daos.chart import ChartDAO +from superset.exceptions import SupersetSecurityException from superset.models.slice import Slice from superset.utils.decorators import on_error, transaction @@ -44,5 +47,8 @@ def validate(self) -> None: chart = ChartDAO.find_by_id(self._chart_id) if not chart: raise ChartNotFoundError() - + try: + security_manager.raise_for_access(chart=chart) + except SupersetSecurityException as ex: + raise ChartAccessDeniedError() from ex self._chart = chart diff --git a/superset/commands/chart/unfave.py b/superset/commands/chart/unfave.py index 0346b433e382..29530ef53b1d 100644 --- a/superset/commands/chart/unfave.py +++ b/superset/commands/chart/unfave.py @@ -17,12 +17,15 @@ import logging from functools import partial +from superset import security_manager from superset.commands.base import BaseCommand from superset.commands.chart.exceptions import ( + ChartAccessDeniedError, ChartNotFoundError, ChartUnfaveError, ) from superset.daos.chart import ChartDAO +from superset.exceptions import SupersetSecurityException from superset.models.slice import Slice from superset.utils.decorators import on_error, transaction @@ -44,5 +47,8 @@ def validate(self) -> None: chart = ChartDAO.find_by_id(self._chart_id) if not chart: raise ChartNotFoundError() - + try: + security_manager.raise_for_access(chart=chart) + except SupersetSecurityException as ex: + raise ChartAccessDeniedError() from ex self._chart = chart diff --git a/superset/commands/chart/update.py b/superset/commands/chart/update.py index 25ca2539a1a3..1af742b37f3c 100644 --- a/superset/commands/chart/update.py +++ b/superset/commands/chart/update.py @@ -73,7 +73,7 @@ def run(self) -> Model: return ChartDAO.update(self._model, self._properties) def _validate_new_dashboard_access( - self, requested_dashboards: list[Dashboard], exceptions: list[Exception] + self, requested_dashboards: list[Dashboard], exceptions: list[ValidationError] ) -> None: """ Validate user has access to any NEW dashboard relationships. @@ -102,6 +102,7 @@ def validate(self) -> None: # noqa: C901 # Validate if datasource_id is provided datasource_type is required datasource_id = self._properties.get("datasource_id") + datasource_type = "" if datasource_id is not None: datasource_type = self._properties.get("datasource_type", "") if not datasource_type: @@ -138,6 +139,9 @@ def validate(self) -> None: # noqa: C901 try: datasource = get_datasource_by_id(datasource_id, datasource_type) self._properties["datasource_name"] = datasource.name + security_manager.raise_for_access(datasource=datasource) + except SupersetSecurityException as ex: + raise ChartForbiddenError() from ex except ValidationError as ex: exceptions.append(ex) diff --git a/superset/commands/chart/warm_up_cache.py b/superset/commands/chart/warm_up_cache.py index d6ca39d7f6b1..f36e1c16c718 100644 --- a/superset/commands/chart/warm_up_cache.py +++ b/superset/commands/chart/warm_up_cache.py @@ -23,11 +23,13 @@ from superset.commands.base import BaseCommand from superset.commands.chart.data.get_data_command import ChartDataCommand from superset.commands.chart.exceptions import ( + ChartAccessDeniedError, ChartInvalidError, WarmUpCacheChartNotFoundError, ) from superset.common.db_query_status import QueryStatus -from superset.extensions import db +from superset.exceptions import SupersetSecurityException +from superset.extensions import db, security_manager from superset.models.slice import Slice from superset.utils import json from superset.utils.core import error_msg_from_exception, QueryObjectFilterClause @@ -124,8 +126,13 @@ def run(self) -> dict[str, Any]: def validate(self) -> None: if isinstance(self._chart_or_id, Slice): - return - chart = db.session.query(Slice).filter_by(id=self._chart_or_id).scalar() - if not chart: - raise WarmUpCacheChartNotFoundError() - self._chart_or_id = chart + chart = self._chart_or_id + else: + chart = db.session.query(Slice).filter_by(id=self._chart_or_id).scalar() + if not chart: + raise WarmUpCacheChartNotFoundError() + self._chart_or_id = chart + try: + security_manager.raise_for_access(chart=chart) + except SupersetSecurityException as ex: + raise ChartAccessDeniedError() from ex diff --git a/superset/commands/dashboard/delete.py b/superset/commands/dashboard/delete.py index fd53519a5083..a4d123fc6596 100644 --- a/superset/commands/dashboard/delete.py +++ b/superset/commands/dashboard/delete.py @@ -48,7 +48,10 @@ def run(self) -> None: return EmbeddedDashboardDAO.delete(self._dashboard.embedded) def validate(self) -> None: - pass + try: + security_manager.raise_for_ownership(self._dashboard) + except SupersetSecurityException as ex: + raise DashboardForbiddenError() from ex class DeleteDashboardCommand(BaseCommand): diff --git a/superset/commands/dataset/create.py b/superset/commands/dataset/create.py index d7eab2b7bb39..bc19a4f04f1b 100644 --- a/superset/commands/dataset/create.py +++ b/superset/commands/dataset/create.py @@ -102,6 +102,14 @@ def validate(self) -> None: # noqa: C901 field_name="sql", ) ) + elif database: + try: + security_manager.raise_for_access( + database=database, + table=table, + ) + except SupersetSecurityException as ex: + exceptions.append(DatasetDataAccessIsNotAllowed(ex.error.message)) try: owners = self.populate_owners(owner_ids) self._properties["owners"] = owners diff --git a/superset/commands/dataset/duplicate.py b/superset/commands/dataset/duplicate.py index 021389162381..2be7be5690b9 100644 --- a/superset/commands/dataset/duplicate.py +++ b/superset/commands/dataset/duplicate.py @@ -22,8 +22,10 @@ from flask_babel import gettext as __ from marshmallow import ValidationError +from superset import security_manager from superset.commands.base import BaseCommand, CreateMixin from superset.commands.dataset.exceptions import ( + DatasetAccessDeniedError, DatasetDuplicateFailedError, DatasetExistsValidationError, DatasetInvalidError, @@ -33,7 +35,7 @@ from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.daos.dataset import DatasetDAO from superset.errors import ErrorLevel, SupersetError, SupersetErrorType -from superset.exceptions import SupersetErrorException +from superset.exceptions import SupersetErrorException, SupersetSecurityException from superset.extensions import db from superset.models.core import Database from superset.sql.parse import Table @@ -110,6 +112,10 @@ def validate(self) -> None: if not base_model: exceptions.append(DatasetNotFoundError()) else: + try: + security_manager.raise_for_access(datasource=base_model) + except SupersetSecurityException as ex: + raise DatasetAccessDeniedError() from ex self._base_model = base_model if self._base_model and self._base_model.kind != "virtual": diff --git a/superset/commands/dataset/warm_up_cache.py b/superset/commands/dataset/warm_up_cache.py index 97b00c4772ad..bf01f9521c2f 100644 --- a/superset/commands/dataset/warm_up_cache.py +++ b/superset/commands/dataset/warm_up_cache.py @@ -20,9 +20,13 @@ from superset.commands.base import BaseCommand from superset.commands.chart.warm_up_cache import ChartWarmUpCacheCommand -from superset.commands.dataset.exceptions import WarmUpCacheTableNotFoundError +from superset.commands.dataset.exceptions import ( + DatasetAccessDeniedError, + WarmUpCacheTableNotFoundError, +) from superset.connectors.sqla.models import SqlaTable -from superset.extensions import db +from superset.exceptions import SupersetSecurityException +from superset.extensions import db, security_manager from superset.models.core import Database from superset.models.slice import Slice @@ -63,6 +67,10 @@ def validate(self) -> None: ).one_or_none() if not table: raise WarmUpCacheTableNotFoundError() + try: + security_manager.raise_for_access(datasource=table) + except SupersetSecurityException as ex: + raise DatasetAccessDeniedError() from ex self._charts = ( db.session.query(Slice) .filter_by(datasource_id=table.id, datasource_type=table.type) diff --git a/superset/commands/report/base.py b/superset/commands/report/base.py index 268016a54c01..353d9fea1398 100644 --- a/superset/commands/report/base.py +++ b/superset/commands/report/base.py @@ -22,6 +22,7 @@ from flask_babel import gettext as _ from marshmallow import ValidationError +from superset import security_manager from superset.commands.base import BaseCommand from superset.commands.report.exceptions import ( ChartNotFoundValidationError, @@ -29,11 +30,14 @@ DashboardNotFoundValidationError, DashboardNotSavedValidationError, ReportScheduleEitherChartOrDashboardError, + ReportScheduleForbiddenError, ReportScheduleFrequencyNotAllowed, ReportScheduleOnlyChartOrDashboardError, ) +from superset.daos.base import BaseDAO from superset.daos.chart import ChartDAO from superset.daos.dashboard import DashboardDAO +from superset.exceptions import SupersetSecurityException from superset.reports.models import ( ReportCreationMethod, ReportScheduleType, @@ -53,6 +57,26 @@ def run(self) -> Any: def validate(self) -> None: pass + def _check_object_access( + self, + object_id: int, + *, + kind: str, + dao: type[BaseDAO[Any]], + not_found_exc: type[ValidationError], + exceptions: list[ValidationError], + ) -> None: + """Validate the object exists and the current user can access it.""" + obj = dao.find_by_id(object_id) + if not obj: + exceptions.append(not_found_exc()) + else: + try: + security_manager.raise_for_access(**{kind: obj}) + except SupersetSecurityException as ex: + raise ReportScheduleForbiddenError() from ex + self._properties[kind] = obj + def validate_chart_dashboard( self, exceptions: list[ValidationError], update: bool = False ) -> None: @@ -74,15 +98,21 @@ def validate_chart_dashboard( exceptions.append(ReportScheduleOnlyChartOrDashboardError()) if chart_id: - chart = ChartDAO.find_by_id(chart_id) - if not chart: - exceptions.append(ChartNotFoundValidationError()) - self._properties["chart"] = chart + self._check_object_access( + chart_id, + kind="chart", + dao=ChartDAO, + not_found_exc=ChartNotFoundValidationError, + exceptions=exceptions, + ) elif dashboard_id: - dashboard = DashboardDAO.find_by_id(dashboard_id) - if not dashboard: - exceptions.append(DashboardNotFoundValidationError()) - self._properties["dashboard"] = dashboard + self._check_object_access( + dashboard_id, + kind="dashboard", + dao=DashboardDAO, + not_found_exc=DashboardNotFoundValidationError, + exceptions=exceptions, + ) elif not update: exceptions.append(ReportScheduleEitherChartOrDashboardError()) diff --git a/superset/dashboards/api.py b/superset/dashboards/api.py index 3deb304c0090..5220da680a0f 100644 --- a/superset/dashboards/api.py +++ b/superset/dashboards/api.py @@ -2131,7 +2131,10 @@ def delete_embedded(self, dashboard: Dashboard) -> Response: 500: $ref: '#/components/responses/500' """ - DeleteEmbeddedDashboardCommand(dashboard).run() + try: + DeleteEmbeddedDashboardCommand(dashboard).run() + except DashboardForbiddenError: + return self.response_403() return self.response(200, message="OK") @expose("//copy/", methods=("POST",)) diff --git a/superset/views/base_api.py b/superset/views/base_api.py index d800ea48dba4..a71a0bd9c75a 100644 --- a/superset/views/base_api.py +++ b/superset/views/base_api.py @@ -18,7 +18,7 @@ import functools import logging -from typing import Any, Callable, cast +from typing import Any, Callable, cast, Optional from flask import request, Response from flask_appbuilder import Model, ModelRestApi @@ -566,6 +566,14 @@ def delete_headless(self, pk: int) -> Response: self.send_stats_metrics(response, self.delete.__name__, duration) return response + def ensure_owners_write_access(self, column_name: str) -> Optional[Response]: + """Restrict the owners related field to users with write access.""" + if column_name == "owners" and not security_manager.can_access( + "can_write", self.class_permission_name + ): + return self.response_403() + return None + @expose("/related/", methods=("GET",)) @protect() @safe @@ -600,11 +608,15 @@ def related(self, column_name: str, **kwargs: Any) -> FlaskResponse: $ref: '#/components/responses/400' 401: $ref: '#/components/responses/401' + 403: + $ref: '#/components/responses/403' 404: $ref: '#/components/responses/404' 500: $ref: '#/components/responses/500' """ + if response := self.ensure_owners_write_access(column_name): + return response if column_name not in self.allowed_rel_fields: self.incr_stats("error", self.related.__name__) return self.response_404() diff --git a/superset/views/datasource/utils.py b/superset/views/datasource/utils.py index deec4067a56a..df974e696e45 100644 --- a/superset/views/datasource/utils.py +++ b/superset/views/datasource/utils.py @@ -14,8 +14,10 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import logging -from typing import Any, Iterable, Optional +from typing import Any, Iterable, Optional, TYPE_CHECKING from flask import current_app as app @@ -28,6 +30,9 @@ from superset.utils.core import QueryStatus from superset.views.datasource.schemas import SamplesPayloadSchema +if TYPE_CHECKING: + from superset.daos.datasource import Datasource + logger = logging.getLogger(__name__) @@ -95,12 +100,14 @@ def get_samples( # pylint: disable=too-many-arguments page: int = 1, per_page: int = 1000, payload: SamplesPayloadSchema | None = None, + datasource: Datasource | None = None, dashboard_id: int | None = None, ) -> dict[str, Any]: - datasource = DatasourceDAO.get_datasource( - datasource_type=datasource_type, - database_id_or_uuid=str(datasource_id), - ) + if datasource is None: + datasource = DatasourceDAO.get_datasource( + datasource_type=datasource_type, + database_id_or_uuid=str(datasource_id), + ) form_data = {"dashboardId": dashboard_id} if dashboard_id else None limit_clause = get_limit_clause(page, per_page) diff --git a/superset/views/datasource/views.py b/superset/views/datasource/views.py index b1b44d627767..9a420bb10b1b 100644 --- a/superset/views/datasource/views.py +++ b/superset/views/datasource/views.py @@ -36,6 +36,7 @@ from superset.daos.dashboard import DashboardDAO from superset.daos.dataset import DatasetDAO from superset.daos.datasource import DatasourceDAO +from superset.daos.exceptions import DatasourceNotFound, DatasourceTypeNotSupportedError from superset.exceptions import SupersetException, SupersetSecurityException from superset.models.core import Database from superset.sql.parse import Table @@ -223,6 +224,28 @@ def samples(self) -> FlaskResponse: dashboard, ): return json_error_response(_("Forbidden"), status=403) + else: + # Pre-fetch and access-check only for table-type datasources. + # Non-table types (query, saved_query) use a different access model; + # passing them to raise_for_access(datasource=...) would check the + # wrong attributes. Let get_samples() handle the lookup for those types. + if params["datasource_type"] in { + DatasourceType.TABLE.value, + DatasourceType.DATASET.value, + }: + try: + dataset = DatasourceDAO.get_datasource( + datasource_type=params["datasource_type"], + database_id_or_uuid=params["datasource_id"], + ) + except (DatasourceNotFound, DatasourceTypeNotSupportedError): + return self.response_404() + try: + security_manager.raise_for_access(datasource=dataset) + except SupersetSecurityException: + return json_error_response(_("Forbidden"), status=403) + else: + dataset = None rv = get_samples( datasource_type=params["datasource_type"], @@ -231,6 +254,7 @@ def samples(self) -> FlaskResponse: page=params["page"], per_page=params["per_page"], payload=payload, + datasource=dataset, dashboard_id=dashboard_id, ) return self.json_response({"result": rv}) diff --git a/superset/views/sql_lab/views.py b/superset/views/sql_lab/views.py index 4ac9b51fcc46..c927815c5f08 100644 --- a/superset/views/sql_lab/views.py +++ b/superset/views/sql_lab/views.py @@ -226,10 +226,14 @@ class TableSchemaView(BaseSupersetView): def post(self) -> FlaskResponse: try: table = json.loads(request.form["table"]) + tab_state_id = table["queryEditorId"] + owner_id = _get_owner_id(tab_state_id) + if owner_id is None or owner_id != get_user_id(): + return json_error_response(__("Forbidden"), status=403) # delete any existing table schema db.session.query(TableSchema).filter( - TableSchema.tab_state_id == table["queryEditorId"], + TableSchema.tab_state_id == tab_state_id, TableSchema.database_id == table["dbId"], TableSchema.catalog == table.get("catalog"), TableSchema.schema == table["schema"], @@ -237,7 +241,7 @@ def post(self) -> FlaskResponse: ).delete(synchronize_session=False) table_schema = TableSchema( - tab_state_id=table["queryEditorId"], + tab_state_id=tab_state_id, database_id=table["dbId"], catalog=table.get("catalog"), schema=table["schema"], @@ -256,9 +260,19 @@ def post(self) -> FlaskResponse: @expose("/", methods=("DELETE",)) def delete(self, table_schema_id: int) -> FlaskResponse: try: - db.session.query(TableSchema).filter( - TableSchema.id == table_schema_id - ).delete(synchronize_session=False) + tab_state_id = ( + db.session.query(TableSchema.tab_state_id) + .filter_by(id=table_schema_id) + .scalar() + ) + if tab_state_id is None: + return json_error_response(__("Not found"), status=404) + owner_id = _get_owner_id(tab_state_id) + if owner_id is None or owner_id != get_user_id(): + return json_error_response(__("Forbidden"), status=403) + db.session.query(TableSchema).filter_by(id=table_schema_id).delete( + synchronize_session=False + ) db.session.commit() return json_success(json.dumps("OK")) except Exception as ex: # pylint: disable=broad-except @@ -268,11 +282,24 @@ def delete(self, table_schema_id: int) -> FlaskResponse: @has_access_api @expose("//expanded", methods=("POST",)) def expanded(self, table_schema_id: int) -> FlaskResponse: - payload = json.loads(request.form["expanded"]) - ( - db.session.query(TableSchema) - .filter_by(id=table_schema_id) - .update({"expanded": payload}) - ) - response = json.dumps({"id": table_schema_id, "expanded": payload}) - return json_success(response) + try: + tab_state_id = ( + db.session.query(TableSchema.tab_state_id) + .filter_by(id=table_schema_id) + .scalar() + ) + if tab_state_id is None: + return json_error_response(__("Not found"), status=404) + owner_id = _get_owner_id(tab_state_id) + if owner_id is None or owner_id != get_user_id(): + return json_error_response(__("Forbidden"), status=403) + payload = json.loads(request.form["expanded"]) + db.session.query(TableSchema).filter_by(id=table_schema_id).update( + {"expanded": payload} + ) + db.session.commit() + response = json.dumps({"id": table_schema_id, "expanded": payload}) + return json_success(response) + except Exception as ex: # pylint: disable=broad-except + db.session.rollback() + return json_error_response(error_msg_from_exception(ex), 400) diff --git a/superset/views/users/api.py b/superset/views/users/api.py index 5f75b836eba9..44c98b1e5cc3 100644 --- a/superset/views/users/api.py +++ b/superset/views/users/api.py @@ -169,9 +169,13 @@ class UserRestApi(BaseSupersetApi): resource_name = "user" openapi_spec_tag = "User" + # Enable browser login for all user endpoints to support avatar access and other + # user-related functionality that may be called from browser contexts + allow_browser_login = True openapi_spec_component_schemas = (UserResponseSchema,) @expose("//avatar.png", methods=("GET",)) + @protect() @safe def avatar(self, user_id: int) -> Response: """Get a redirect to the avatar's URL for the user with the given ID. diff --git a/superset/views/utils.py b/superset/views/utils.py index 8d8f11702851..b66c48af5186 100644 --- a/superset/views/utils.py +++ b/superset/views/utils.py @@ -243,10 +243,12 @@ def get_form_data( # or if form_data only contains slice_id and additional filters if slice_id and (use_slice_data or valid_slice_id): slc = db.session.query(Slice).filter_by(id=slice_id).one_or_none() - if slc: + if slc and security_manager.can_access_chart(slc): slice_form_data = slc.form_data.copy() slice_form_data.update(form_data) form_data = slice_form_data + else: + slc = None update_time_range(form_data) return form_data, slc diff --git a/tests/integration_tests/charts/commands_tests.py b/tests/integration_tests/charts/commands_tests.py index b5fa05b72646..fecfa64f9d23 100644 --- a/tests/integration_tests/charts/commands_tests.py +++ b/tests/integration_tests/charts/commands_tests.py @@ -676,13 +676,22 @@ def test_fave_unfave_chart_command_non_owner(self, mock_find_by_id): # Assert that the chart exists assert example_chart is not None - with override_user(security_manager.find_user("gamma")): - AddFavoriteChartCommand(example_chart.id).run() - ids = ChartDAO.favorited_ids([example_chart]) + # Grant gamma read access to the datasource so the access check passes. + # Faving requires datasource access but not ownership. + if example_chart.datasource: + self.grant_role_access_to_table(example_chart.datasource, "Gamma") - assert example_chart.id in ids + try: + with override_user(security_manager.find_user("gamma")): + AddFavoriteChartCommand(example_chart.id).run() + ids = ChartDAO.favorited_ids([example_chart]) - DelFavoriteChartCommand(example_chart.id).run() - ids = ChartDAO.favorited_ids([example_chart]) + assert example_chart.id in ids - assert example_chart.id not in ids + DelFavoriteChartCommand(example_chart.id).run() + ids = ChartDAO.favorited_ids([example_chart]) + + assert example_chart.id not in ids + finally: + if example_chart.datasource: + self.revoke_role_access_to_table("Gamma", example_chart.datasource) diff --git a/tests/integration_tests/datasets/commands_tests.py b/tests/integration_tests/datasets/commands_tests.py index 58f947034e71..5682452969a3 100644 --- a/tests/integration_tests/datasets/commands_tests.py +++ b/tests/integration_tests/datasets/commands_tests.py @@ -569,15 +569,14 @@ def test_database_not_found(self, mock_g): with self.assertRaises(DatasetInvalidError): # noqa: PT027 CreateDatasetCommand({"table_name": "table", "database": 9999}).run() - @patch("superset.commands.utils.g") @patch("superset.models.core.Database.get_table") - def test_get_table_from_database_error(self, get_table_mock, mock_g): + def test_get_table_from_database_error(self, get_table_mock): get_table_mock.side_effect = SQLAlchemyError - mock_g.user = security_manager.find_user("admin") - with self.assertRaises(DatasetInvalidError): # noqa: PT027 - CreateDatasetCommand( - {"table_name": "table", "database": get_example_database().id} - ).run() + with override_user(security_manager.find_user("admin")): + with self.assertRaises(DatasetInvalidError): # noqa: PT027 + CreateDatasetCommand( + {"table_name": "table", "database": get_example_database().id} + ).run() def test_create_dataset_command(self): examples_db = get_example_database() @@ -635,9 +634,10 @@ def test_warm_up_cache(self): ) .all() ) - results = DatasetWarmUpCacheCommand( - get_example_database().database_name, "birth_names", None, None - ).run() + with override_user(security_manager.find_user("admin")): + results = DatasetWarmUpCacheCommand( + get_example_database().database_name, "birth_names", None, None + ).run() assert len(results) == len(birth_charts) for chart_result in results: assert "chart_id" in chart_result diff --git a/tests/unit_tests/commands/chart/test_access_checks.py b/tests/unit_tests/commands/chart/test_access_checks.py new file mode 100644 index 000000000000..f8f2b2b0135d --- /dev/null +++ b/tests/unit_tests/commands/chart/test_access_checks.py @@ -0,0 +1,147 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for per-object datasource access checks in chart create/update.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from superset.commands.chart.exceptions import ChartForbiddenError +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.exceptions import SupersetSecurityException + + +def _security_exception() -> SupersetSecurityException: + return SupersetSecurityException( + SupersetError( + error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR, + message="Access denied", + level=ErrorLevel.ERROR, + ) + ) + + +# --------------------------------------------------------------------------- +# CreateChartCommand +# --------------------------------------------------------------------------- + + +def test_create_chart_command_forbidden_when_no_datasource_access() -> None: + """CreateChartCommand.validate() must raise ChartForbiddenError when the + caller lacks access to the chart's datasource.""" + from superset.commands.chart.create import CreateChartCommand + + with patch( + "superset.commands.chart.create.get_datasource_by_id", + return_value=MagicMock(name="datasource"), + ): + with patch( + "superset.commands.chart.create.security_manager.raise_for_access", + side_effect=_security_exception(), + ): + with patch( + "superset.commands.chart.create.CreateChartCommand.populate_owners", + return_value=[], + ): + command = CreateChartCommand( + { + "slice_name": "test", + "viz_type": "bar", + "datasource_id": 1, + "datasource_type": "table", + } + ) + with pytest.raises(ChartForbiddenError): + command.validate() + + +def test_create_chart_command_allowed_when_access_passes() -> None: + """CreateChartCommand.validate() must not raise when the caller has access.""" + from superset.commands.chart.create import CreateChartCommand + + mock_datasource = MagicMock() + mock_datasource.name = "test_table" + + with patch( + "superset.commands.chart.create.get_datasource_by_id", + return_value=mock_datasource, + ): + with patch("superset.commands.chart.create.security_manager.raise_for_access"): + with patch( + "superset.commands.chart.create.CreateChartCommand.populate_owners", + return_value=[], + ): + with patch( + "superset.commands.chart.create.DashboardDAO.find_by_ids", + return_value=[], + ): + command = CreateChartCommand( + { + "slice_name": "test", + "viz_type": "bar", + "datasource_id": 1, + "datasource_type": "table", + } + ) + command.validate() # should not raise + + +# --------------------------------------------------------------------------- +# UpdateChartCommand +# --------------------------------------------------------------------------- + + +def test_update_chart_command_forbidden_when_no_datasource_access() -> None: + """UpdateChartCommand.validate() must raise ChartForbiddenError when the + caller lacks access to the new datasource.""" + from superset.commands.chart.update import UpdateChartCommand + + mock_chart = MagicMock() + mock_chart.id = 1 + mock_chart.owners = [] + mock_chart.dashboards = [] + mock_chart.tags = [] + + with patch( + "superset.commands.chart.update.ChartDAO.find_by_id", + return_value=mock_chart, + ): + with patch( + "superset.commands.chart.update.security_manager.raise_for_ownership" + ): + with patch( + "superset.commands.chart.update.UpdateChartCommand.compute_owners", + return_value=[], + ): + with patch("superset.commands.chart.update.validate_tags"): + with patch( + "superset.commands.chart.update.get_datasource_by_id", + return_value=MagicMock(name="datasource"), + ): + with patch( + "superset.commands.chart.update.security_manager.raise_for_access", + side_effect=_security_exception(), + ): + command = UpdateChartCommand( + 1, + { + "datasource_id": 2, + "datasource_type": "table", + }, + ) + with pytest.raises(ChartForbiddenError): + command.validate() diff --git a/tests/unit_tests/commands/chart/warm_up_cache_test.py b/tests/unit_tests/commands/chart/warm_up_cache_test.py index 7eaefafc08a2..f178ab85e753 100644 --- a/tests/unit_tests/commands/chart/warm_up_cache_test.py +++ b/tests/unit_tests/commands/chart/warm_up_cache_test.py @@ -22,6 +22,12 @@ from superset.models.slice import Slice +@pytest.fixture(autouse=True) +def mock_security_manager(): + with patch("superset.commands.chart.warm_up_cache.security_manager"): + yield + + @patch("superset.commands.chart.warm_up_cache.get_dashboard_extra_filters") @patch("superset.commands.chart.warm_up_cache.ChartDataCommand") def test_applies_dashboard_filters_to_non_legacy_chart( diff --git a/tests/unit_tests/commands/dataset/test_create.py b/tests/unit_tests/commands/dataset/test_create.py index 4202cb3ff2ff..c84b1664c770 100644 --- a/tests/unit_tests/commands/dataset/test_create.py +++ b/tests/unit_tests/commands/dataset/test_create.py @@ -26,7 +26,7 @@ from superset.models.core import Database -def test_create_dataset_invalid_sql_parse_error(): +def test_create_dataset_invalid_sql_parse_error() -> None: """Test that invalid SQL returns a 4xx error when caught as SupersetParseError.""" mock_database = Mock(spec=Database) mock_database.id = 1 @@ -75,7 +75,7 @@ def test_create_dataset_invalid_sql_parse_error(): ) -def test_create_dataset_valid_sql_with_access_error(): +def test_create_dataset_valid_sql_with_access_error() -> None: """ Test that security exceptions work correctly """ @@ -136,7 +136,10 @@ def test_create_dataset_valid_sql_with_access_error(): ) -def test_create_dataset_physical_table_no_parse_error(): +@patch("superset.commands.dataset.create.security_manager") +def test_create_dataset_physical_table_no_parse_error( + mock_security_manager: Mock, +) -> None: """Test that physical tables (no SQL) don't trigger parsing.""" mock_database = Mock(spec=Database) mock_database.id = 1 diff --git a/tests/unit_tests/commands/dataset/test_duplicate.py b/tests/unit_tests/commands/dataset/test_duplicate.py new file mode 100644 index 000000000000..28b047e3e687 --- /dev/null +++ b/tests/unit_tests/commands/dataset/test_duplicate.py @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for per-object access check in DuplicateDatasetCommand.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from superset.commands.dataset.exceptions import DatasetAccessDeniedError +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.exceptions import SupersetSecurityException + + +def _security_exception() -> SupersetSecurityException: + return SupersetSecurityException( + SupersetError( + error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR, + message="Access denied to dataset", + level=ErrorLevel.ERROR, + ) + ) + + +def test_duplicate_dataset_forbidden_when_no_access() -> None: + """DuplicateDatasetCommand.validate() must raise DatasetAccessDeniedError + when the caller lacks read access to the source dataset.""" + from superset.commands.dataset.duplicate import DuplicateDatasetCommand + + mock_dataset = MagicMock() + mock_dataset.id = 1 + mock_dataset.kind = "virtual" + + with patch( + "superset.commands.dataset.duplicate.DatasetDAO.find_by_id", + return_value=mock_dataset, + ): + with patch( + "superset.commands.dataset.duplicate.security_manager.raise_for_access", + side_effect=_security_exception(), + ): + with patch( + "superset.commands.dataset.duplicate.DuplicateDatasetCommand.populate_owners", + return_value=[], + ): + command = DuplicateDatasetCommand( + { + "base_model_id": 1, + "table_name": "duplicate_name", + "is_managed_externally": False, + } + ) + with pytest.raises(DatasetAccessDeniedError): + command.validate() + + +def test_duplicate_dataset_access_check_passes_through() -> None: + """DuplicateDatasetCommand.validate() must not raise DatasetAccessDeniedError + when security_manager.raise_for_access() does not raise.""" + from superset.commands.dataset.duplicate import DuplicateDatasetCommand + + mock_dataset = MagicMock() + mock_dataset.id = 1 + mock_dataset.kind = "virtual" + + with patch( + "superset.commands.dataset.duplicate.DatasetDAO.find_by_id", + return_value=mock_dataset, + ): + with patch( + "superset.commands.dataset.duplicate.security_manager.raise_for_access" + ) as mock_access: + with patch( + "superset.commands.dataset.duplicate.DatasetDAO.find_one_or_none", + return_value=None, + ): + with patch( + "superset.commands.dataset.duplicate.DuplicateDatasetCommand.populate_owners", + return_value=[], + ): + command = DuplicateDatasetCommand( + { + "base_model_id": 1, + "table_name": "new_unique_name", + "is_managed_externally": False, + } + ) + command.validate() # should not raise + # Confirm access check was called with the base dataset + mock_access.assert_called_once_with(datasource=mock_dataset) diff --git a/tests/unit_tests/commands/dataset/test_warm_up_cache.py b/tests/unit_tests/commands/dataset/test_warm_up_cache.py new file mode 100644 index 000000000000..89882180093b --- /dev/null +++ b/tests/unit_tests/commands/dataset/test_warm_up_cache.py @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for DatasetWarmUpCacheCommand access control.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from superset.commands.dataset.exceptions import ( + DatasetAccessDeniedError, + WarmUpCacheTableNotFoundError, +) +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.exceptions import SupersetSecurityException + + +def _security_exception() -> SupersetSecurityException: + return SupersetSecurityException( + SupersetError( + error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR, + message="Access denied to table", + level=ErrorLevel.ERROR, + ) + ) + + +def _mock_table() -> MagicMock: + table = MagicMock() + table.id = 1 + table.type = "table" + return table + + +def test_warm_up_cache_raises_not_found_when_table_missing() -> None: + """validate() must raise WarmUpCacheTableNotFoundError when the table + does not exist in the given database.""" + from superset.commands.dataset.warm_up_cache import DatasetWarmUpCacheCommand + + with patch("superset.commands.dataset.warm_up_cache.db") as mock_db: + q = mock_db.session.query.return_value + q.join.return_value.filter.return_value.one_or_none.return_value = None + + command = DatasetWarmUpCacheCommand( + db_name="mydb", + table_name="nonexistent", + dashboard_id=None, + extra_filters=None, + ) + with pytest.raises(WarmUpCacheTableNotFoundError): + command.validate() + + +def test_warm_up_cache_raises_access_denied_when_no_permission() -> None: + """validate() must raise DatasetAccessDeniedError when the caller lacks + access to the dataset.""" + from superset.commands.dataset.warm_up_cache import DatasetWarmUpCacheCommand + + mock_table = _mock_table() + + with patch("superset.commands.dataset.warm_up_cache.db") as mock_db: + q = mock_db.session.query.return_value + q.join.return_value.filter.return_value.one_or_none.return_value = mock_table + + with patch( + "superset.commands.dataset.warm_up_cache.security_manager.raise_for_access", + side_effect=_security_exception(), + ): + command = DatasetWarmUpCacheCommand( + db_name="mydb", + table_name="secret_table", + dashboard_id=None, + extra_filters=None, + ) + with pytest.raises(DatasetAccessDeniedError): + command.validate() + + +def test_warm_up_cache_populates_charts_when_access_granted() -> None: + """validate() must populate _charts when the caller has access.""" + from superset.commands.dataset.warm_up_cache import DatasetWarmUpCacheCommand + + mock_table = _mock_table() + mock_charts = [MagicMock(), MagicMock()] + + with patch("superset.commands.dataset.warm_up_cache.db") as mock_db: + # First query() call returns table; second returns chart list + q = mock_db.session.query.return_value + q.join.return_value.filter.return_value.one_or_none.return_value = mock_table + q.filter_by.return_value.all.return_value = mock_charts + + with patch( + "superset.commands.dataset.warm_up_cache.security_manager.raise_for_access" + ): + command = DatasetWarmUpCacheCommand( + db_name="mydb", + table_name="allowed_table", + dashboard_id=None, + extra_filters=None, + ) + command.validate() + assert command._charts == mock_charts diff --git a/tests/unit_tests/commands/report/test_create_recipients.py b/tests/unit_tests/commands/report/test_create_recipients.py index 3956302e0452..d583796d12a9 100644 --- a/tests/unit_tests/commands/report/test_create_recipients.py +++ b/tests/unit_tests/commands/report/test_create_recipients.py @@ -17,6 +17,8 @@ from unittest.mock import MagicMock, patch +from marshmallow import ValidationError + from superset.commands.report.create import CreateReportScheduleCommand from superset.commands.report.exceptions import ReportScheduleUserEmailNotFoundError from superset.reports.models import ( @@ -45,7 +47,7 @@ def test_populate_recipients_chart_creation_with_user_email() -> None: ], } - exceptions: list[Exception] = [] + exceptions: list[ValidationError] = [] command._populate_recipients(exceptions) # Check that recipients were overridden @@ -73,7 +75,7 @@ def test_populate_recipients_dashboard_creation_with_user_email() -> None: # No recipients provided initially } - exceptions: list[Exception] = [] + exceptions: list[ValidationError] = [] command._populate_recipients(exceptions) # Check that recipients were set @@ -104,7 +106,7 @@ def test_populate_recipients_alerts_reports_keeps_original() -> None: "recipients": original_recipients, } - exceptions: list[Exception] = [] + exceptions: list[ValidationError] = [] command._populate_recipients(exceptions) # Check that recipients were NOT changed @@ -125,7 +127,7 @@ def test_populate_recipients_chart_creation_no_user_email() -> None: "creation_method": ReportCreationMethod.CHARTS, } - exceptions: list[Exception] = [] + exceptions: list[ValidationError] = [] command._populate_recipients(exceptions) # Check that validation error was added @@ -149,7 +151,7 @@ def test_populate_recipients_dashboard_creation_no_user() -> None: "creation_method": ReportCreationMethod.DASHBOARDS, } - exceptions: list[Exception] = [] + exceptions: list[ValidationError] = [] command._populate_recipients(exceptions) # Check that validation error was added @@ -171,7 +173,7 @@ def test_populate_recipients_no_creation_method() -> None: "recipients": original_recipients, } - exceptions: list[Exception] = [] + exceptions: list[ValidationError] = [] command._populate_recipients(exceptions) # Check that recipients were NOT changed From f7f50a797707075499ecbb2ce30416d19bfd7c0e Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:55:25 +0100 Subject: [PATCH 05/13] fix(sqllab): quote CTAS target identifiers and validate tmp_table_name format (#40245) Co-authored-by: Claude Sonnet 4.6 --- superset/sql/parse.py | 8 +- superset/sqllab/schemas.py | 14 +- superset/views/sql_lab/schemas.py | 7 +- tests/integration_tests/celery_tests.py | 138 ++++-------------- tests/unit_tests/sql/parse_tests.py | 4 +- tests/unit_tests/sql_lab_execution_context.py | 3 +- 6 files changed, 56 insertions(+), 118 deletions(-) diff --git a/superset/sql/parse.py b/superset/sql/parse.py index 154a8ae0b198..f2f6b805782a 100644 --- a/superset/sql/parse.py +++ b/superset/sql/parse.py @@ -923,9 +923,11 @@ def as_create_table(self, table: Table, method: CTASMethod) -> SQLStatement: :return: A new SQLStatement with the create table statement. """ table_expr = exp.Table( - this=exp.Identifier(this=table.table), - db=exp.Identifier(this=table.schema) if table.schema else None, - catalog=exp.Identifier(this=table.catalog) if table.catalog else None, + this=exp.Identifier(this=table.table, quoted=True), + db=exp.Identifier(this=table.schema, quoted=True) if table.schema else None, + catalog=exp.Identifier(this=table.catalog, quoted=True) + if table.catalog + else None, ) create_table = exp.Create( this=table_expr, diff --git a/superset/sqllab/schemas.py b/superset/sqllab/schemas.py index 3c2413f8d3ba..4adb07e810a7 100644 --- a/superset/sqllab/schemas.py +++ b/superset/sqllab/schemas.py @@ -14,10 +14,17 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from marshmallow import fields, Schema +from marshmallow import fields, Schema, validate from superset.databases.schemas import ImportV1DatabaseSchema +# Restricts the optional CTAS target name to a bare SQL identifier. Shared by the +# SQL Lab execute payload schemas so both request paths validate it identically. +tmp_table_name_validator = validate.Regexp( + r"^([A-Za-z_][A-Za-z0-9_]*)?\Z", + error="tmp_table_name must contain only letters, digits, and underscores", +) + sql_lab_get_results_schema = { "type": "object", "properties": { @@ -69,7 +76,10 @@ class ExecutePayloadSchema(Schema): tab = fields.String(allow_none=True) ctas_method = fields.String(allow_none=True) templateParams = fields.String(allow_none=True) # noqa: N815 - tmp_table_name = fields.String(allow_none=True) + tmp_table_name = fields.String( + allow_none=True, + validate=tmp_table_name_validator, + ) select_as_cta = fields.Boolean(allow_none=True) runAsync = fields.Boolean(allow_none=True) # noqa: N815 expand_data = fields.Boolean(allow_none=True) diff --git a/superset/views/sql_lab/schemas.py b/superset/views/sql_lab/schemas.py index c9f57d7c023f..99f6fed7b89e 100644 --- a/superset/views/sql_lab/schemas.py +++ b/superset/views/sql_lab/schemas.py @@ -17,6 +17,8 @@ from marshmallow import fields, Schema +from superset.sqllab.schemas import tmp_table_name_validator + class SqlJsonPayloadSchema(Schema): database_id = fields.Integer(required=True) @@ -28,7 +30,10 @@ class SqlJsonPayloadSchema(Schema): tab = fields.String(allow_none=True) ctas_method = fields.String(allow_none=True) templateParams = fields.String(allow_none=True) # noqa: N815 - tmp_table_name = fields.String(allow_none=True) + tmp_table_name = fields.String( + allow_none=True, + validate=tmp_table_name_validator, + ) select_as_cta = fields.Boolean(allow_none=True) runAsync = fields.Boolean(allow_none=True) # noqa: N815 expand_data = fields.Boolean(allow_none=True) diff --git a/tests/integration_tests/celery_tests.py b/tests/integration_tests/celery_tests.py index 8fc5fe293e02..11f9cdf0707a 100644 --- a/tests/integration_tests/celery_tests.py +++ b/tests/integration_tests/celery_tests.py @@ -130,6 +130,18 @@ def quote_f(value: Optional[str]): return inspector.engine.dialect.identifier_preparer.quote_identifier(value) +def expected_cta_sql( + ctas_method: CTASMethod, table: str, schema: Optional[str] = None +) -> str: + target = quote_f(table) + if schema: + target = f"{quote_f(schema)}.{target}" + return ( + f"CREATE {ctas_method.name} {target} AS\n" + "SELECT\n name\nFROM birth_names\nLIMIT 1" + ) + + def cta_result(ctas_method: CTASMethod): if backend() != "presto": return [], [] @@ -225,31 +237,7 @@ def test_run_sync_query_cta_no_data(test_client): @pytest.mark.usefixtures("load_birth_names_data", "login_as_admin") -@pytest.mark.parametrize( - "ctas_method, expected", - [ - ( - CTASMethod.TABLE, - """ -CREATE TABLE sqllab_test_db.test_sync_cta_table AS -SELECT - name -FROM birth_names -LIMIT 1 - """.strip(), - ), - ( - CTASMethod.VIEW, - """ -CREATE VIEW sqllab_test_db.test_sync_cta_view AS -SELECT - name -FROM birth_names -LIMIT 1 - """.strip(), - ), - ], -) +@pytest.mark.parametrize("ctas_method", [CTASMethod.TABLE, CTASMethod.VIEW]) @mock.patch( # noqa: PT008 "superset.sqllab.sqllab_execution_context.get_cta_schema_name", lambda d, u, s, sql: CTAS_SCHEMA_NAME, @@ -257,12 +245,13 @@ def test_run_sync_query_cta_no_data(test_client): def test_run_sync_query_cta_config( test_client, ctas_method: CTASMethod, - expected: str, ) -> None: - if backend() == "sqlite": + db_backend = backend() + if db_backend == "sqlite": # sqlite doesn't support schemas return tmp_table_name = f"{TEST_SYNC_CTA}_{ctas_method.name.lower()}" + expected = expected_cta_sql(ctas_method, tmp_table_name, CTAS_SCHEMA_NAME) result = run_sql( test_client, QUERY, cta=True, ctas_method=ctas_method, tmp_table=tmp_table_name ) @@ -281,31 +270,7 @@ def test_run_sync_query_cta_config( @pytest.mark.usefixtures("load_birth_names_data", "login_as_admin") -@pytest.mark.parametrize( - "ctas_method, expected", - [ - ( - CTASMethod.TABLE, - """ -CREATE TABLE sqllab_test_db.test_async_cta_config_table AS -SELECT - name -FROM birth_names -LIMIT 1 - """.strip(), - ), - ( - CTASMethod.VIEW, - """ -CREATE VIEW sqllab_test_db.test_async_cta_config_view AS -SELECT - name -FROM birth_names -LIMIT 1 - """.strip(), - ), - ], -) +@pytest.mark.parametrize("ctas_method", [CTASMethod.TABLE, CTASMethod.VIEW]) @mock.patch( # noqa: PT008 "superset.sqllab.sqllab_execution_context.get_cta_schema_name", lambda d, u, s, sql: CTAS_SCHEMA_NAME, @@ -313,12 +278,13 @@ def test_run_sync_query_cta_config( def test_run_async_query_cta_config( test_client, ctas_method: CTASMethod, - expected: str, ) -> None: - if backend() == "sqlite": + db_backend = backend() + if db_backend == "sqlite": # sqlite doesn't support schemas return tmp_table_name = f"{TEST_ASYNC_CTA_CONFIG}_{ctas_method.name.lower()}" + expected = expected_cta_sql(ctas_method, tmp_table_name, CTAS_SCHEMA_NAME) result = run_sql( test_client, QUERY, @@ -341,37 +307,14 @@ def test_run_async_query_cta_config( @pytest.mark.usefixtures("load_birth_names_data", "login_as_admin") -@pytest.mark.parametrize( - "ctas_method, expected", - [ - ( - CTASMethod.TABLE, - """ -CREATE TABLE test_async_cta_table AS -SELECT - name -FROM birth_names -LIMIT 1 - """.strip(), - ), - ( - CTASMethod.VIEW, - """ -CREATE VIEW test_async_cta_view AS -SELECT - name -FROM birth_names -LIMIT 1 - """.strip(), - ), - ], -) +@pytest.mark.parametrize("ctas_method", [CTASMethod.TABLE, CTASMethod.VIEW]) def test_run_async_cta_query( test_client, ctas_method: CTASMethod, - expected: str, ) -> None: + db_backend = backend() table_name = f"{TEST_ASYNC_CTA}_{ctas_method.name.lower()}" + expected = expected_cta_sql(ctas_method, table_name) result = run_sql( test_client, QUERY, @@ -388,7 +331,7 @@ def test_run_async_cta_query( assert query.executed_sql == expected assert QUERY == query.sql - assert query.rows == (1 if backend() == "presto" else 0) + assert query.rows == (1 if db_backend == "presto" else 0) assert query.select_as_cta assert query.select_as_cta_used @@ -396,37 +339,14 @@ def test_run_async_cta_query( @pytest.mark.usefixtures("load_birth_names_data", "login_as_admin") -@pytest.mark.parametrize( - "ctas_method, expected", - [ - ( - CTASMethod.TABLE, - """ -CREATE TABLE test_async_lower_limit_table AS -SELECT - name -FROM birth_names -LIMIT 1 - """.strip(), - ), - ( - CTASMethod.VIEW, - """ -CREATE VIEW test_async_lower_limit_view AS -SELECT - name -FROM birth_names -LIMIT 1 - """.strip(), - ), - ], -) +@pytest.mark.parametrize("ctas_method", [CTASMethod.TABLE, CTASMethod.VIEW]) def test_run_async_cta_query_with_lower_limit( test_client, ctas_method: CTASMethod, - expected: str, ) -> None: + db_backend = backend() tmp_table = f"{TEST_ASYNC_LOWER_LIMIT}_{ctas_method.name.lower()}" + expected = expected_cta_sql(ctas_method, tmp_table) result = run_sql( test_client, QUERY, @@ -441,14 +361,14 @@ def test_run_async_cta_query_with_lower_limit( sqlite_select_sql = f"SELECT\n *\nFROM {tmp_table}\nLIMIT {query.limit}\nOFFSET 0" assert query.select_sql == ( sqlite_select_sql - if backend() == "sqlite" + if db_backend == "sqlite" else get_select_star(tmp_table, query.limit) ) assert query.executed_sql == expected assert QUERY == query.sql - assert query.rows == (1 if backend() == "presto" else 0) + assert query.rows == (1 if db_backend == "presto" else 0) assert query.limit == 50000 assert query.select_as_cta assert query.select_as_cta_used diff --git a/tests/unit_tests/sql/parse_tests.py b/tests/unit_tests/sql/parse_tests.py index 27c293229705..7fd7e6a6f3e8 100644 --- a/tests/unit_tests/sql/parse_tests.py +++ b/tests/unit_tests/sql/parse_tests.py @@ -2712,7 +2712,7 @@ def test_rls_predicate_transformer( "SELECT * FROM some_table", Table("some_table"), """ -CREATE TABLE some_table AS +CREATE TABLE "some_table" AS SELECT * FROM some_table @@ -2722,7 +2722,7 @@ def test_rls_predicate_transformer( "SELECT * FROM some_table", Table("some_table", "schema1", "catalog1"), """ -CREATE TABLE catalog1.schema1.some_table AS +CREATE TABLE "catalog1"."schema1"."some_table" AS SELECT * FROM some_table diff --git a/tests/unit_tests/sql_lab_execution_context.py b/tests/unit_tests/sql_lab_execution_context.py index 1591967de481..41374ec293b6 100644 --- a/tests/unit_tests/sql_lab_execution_context.py +++ b/tests/unit_tests/sql_lab_execution_context.py @@ -18,6 +18,7 @@ import pytest +from superset.sql.parse import CTASMethod from superset.sqllab.sqllab_execution_context import ( CreateTableAsSelect, SqlJsonExecutionContext, @@ -97,6 +98,6 @@ def test_create_table_as_select(): "tmp_table_name": "temp_table", } ctas = CreateTableAsSelect.create_from(query_params) - assert ctas.ctas_method == "TABLE" + assert ctas.ctas_method == CTASMethod.TABLE assert ctas.target_schema_name == "public" assert ctas.target_table_name == "temp_table" From cf08a5ebf75f9b2f7ddf24ac1591fc893aa685db Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:55:31 +0100 Subject: [PATCH 06/13] feat(docker): add environment-based debugger control (#40327) Signed-off-by: dependabot[bot] Co-authored-by: Claude Sonnet 4.6 Co-authored-by: Mehmet Salih Yavuz Co-authored-by: Beto Dealmeida Co-authored-by: Elizabeth Thompson Co-authored-by: Evan Rusackas Co-authored-by: Jay Masiwal Co-authored-by: JUST.in DO IT Co-authored-by: Copilot Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Amin Ghadersohi Co-authored-by: chaselynisabella --- docker/docker-bootstrap.sh | 18 +++++++++++++++++- docs/admin_docs/installation/pypi.mdx | 9 ++++++++- .../version-6.1.0/installation/pypi.mdx | 9 ++++++++- .../contributing/development-setup.md | 2 ++ 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/docker/docker-bootstrap.sh b/docker/docker-bootstrap.sh index d20f28007f1b..35871fc42086 100755 --- a/docker/docker-bootstrap.sh +++ b/docker/docker-bootstrap.sh @@ -80,7 +80,23 @@ case "${1}" in ;; app) echo "Starting web app (using development server)..." - flask run -p $PORT --reload --debugger --host=0.0.0.0 --exclude-patterns "*/node_modules/*:*/.venv/*:*/build/*:*/__pycache__/*:*/superset-frontend/*" + + # Environment-based debugger control for security + # Only enable Werkzeug interactive debugger when explicitly requested + # Modern Werkzeug (3.0+) includes PIN protection, but defense-in-depth approach + # Override FLASK_DEBUG so the effective state matches SUPERSET_DEBUG_ENABLED even + # when FLASK_DEBUG=true is inherited from docker/.env or .flaskenv + if [[ "${SUPERSET_DEBUG_ENABLED:-}" == "true" ]]; then + export FLASK_DEBUG=1 + DEBUGGER_FLAG="--debugger" + echo " ⚠️ Werkzeug debugger enabled (requires PIN for /console access)" + else + export FLASK_DEBUG=0 + DEBUGGER_FLAG="--no-debugger" + echo " 🔒 Werkzeug debugger disabled (set SUPERSET_DEBUG_ENABLED=true to enable)" + fi + + flask run -p $PORT --reload $DEBUGGER_FLAG --host=0.0.0.0 --exclude-patterns "*/node_modules/*:*/.venv/*:*/build/*:*/__pycache__/*:*/superset-frontend/*" ;; app-gunicorn) echo "Starting web app..." diff --git a/docs/admin_docs/installation/pypi.mdx b/docs/admin_docs/installation/pypi.mdx index 820ac776b952..7dd5b4b8e792 100644 --- a/docs/admin_docs/installation/pypi.mdx +++ b/docs/admin_docs/installation/pypi.mdx @@ -157,8 +157,15 @@ superset load_examples superset init # To start a development web server on port 8088, use -p to bind to another port -superset run -p 8088 --with-threads --reload --debugger +superset run -p 8088 --with-threads --reload + +# For debugging with interactive console (⚠️ localhost only) +# superset run -p 8088 --with-threads --reload --debugger ``` +:::warning Security Note +The `--debugger` flag enables Werkzeug's interactive console at `/console`. Only use this for local development and never bind to `0.0.0.0` or expose the server to networks when debugging is enabled. +::: + If everything worked, you should be able to navigate to `hostname:port` in your browser (e.g. locally by default at `localhost:8088`) and login using the username and password you created. diff --git a/docs/admin_docs_versioned_docs/version-6.1.0/installation/pypi.mdx b/docs/admin_docs_versioned_docs/version-6.1.0/installation/pypi.mdx index 820ac776b952..7dd5b4b8e792 100644 --- a/docs/admin_docs_versioned_docs/version-6.1.0/installation/pypi.mdx +++ b/docs/admin_docs_versioned_docs/version-6.1.0/installation/pypi.mdx @@ -157,8 +157,15 @@ superset load_examples superset init # To start a development web server on port 8088, use -p to bind to another port -superset run -p 8088 --with-threads --reload --debugger +superset run -p 8088 --with-threads --reload + +# For debugging with interactive console (⚠️ localhost only) +# superset run -p 8088 --with-threads --reload --debugger ``` +:::warning Security Note +The `--debugger` flag enables Werkzeug's interactive console at `/console`. Only use this for local development and never bind to `0.0.0.0` or expose the server to networks when debugging is enabled. +::: + If everything worked, you should be able to navigate to `hostname:port` in your browser (e.g. locally by default at `localhost:8088`) and login using the username and password you created. diff --git a/docs/developer_docs/contributing/development-setup.md b/docs/developer_docs/contributing/development-setup.md index 4760dd550d33..c0ac0d165bb2 100644 --- a/docs/developer_docs/contributing/development-setup.md +++ b/docs/developer_docs/contributing/development-setup.md @@ -102,6 +102,8 @@ Affecting the Docker build process: save some precious time on startup by `SUPERSET_LOAD_EXAMPLES=no docker compose up` - **SUPERSET_LOG_LEVEL (default=info)**: Can be set to debug, info, warning, error, critical for more verbose logging +- **SUPERSET_DEBUG_ENABLED (default=false)**: Enable Werkzeug debugger with interactive console. + Set to `true` for debugging: `SUPERSET_DEBUG_ENABLED=true docker compose up` For more env vars that affect your configuration, see this [superset_config.py](https://github.com/apache/superset/blob/master/docker/pythonpath_dev/superset_config.py) From 3191b0fdcd5ca8f859bc4fdbca1215fbe8c62a43 Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:55:38 +0100 Subject: [PATCH 07/13] fix: apply dashboard access check in related_objects endpoints (#40333) Co-authored-by: Claude Sonnet 4.6 --- superset/commands/tag/create.py | 35 ++++++++++++++++ superset/commands/tag/delete.py | 40 ++++++++++++++++++- superset/commands/tag/utils.py | 17 +++++--- superset/databases/api.py | 2 + superset/datasets/api.py | 2 + .../integration_tests/tags/commands_tests.py | 4 ++ 6 files changed, 93 insertions(+), 7 deletions(-) diff --git a/superset/commands/tag/create.py b/superset/commands/tag/create.py index b3788eb8702d..bb18253c625b 100644 --- a/superset/commands/tag/create.py +++ b/superset/commands/tag/create.py @@ -60,9 +60,44 @@ def validate(self) -> None: exceptions.append( TagCreateFailedError(f"invalid object type {self._object_type}") ) + + # Validate user has access to the target object + if object_type: + self._validate_object_access(object_type, self._object_id, exceptions) + if exceptions: raise TagInvalidError(exceptions=exceptions) + def _validate_object_access( + self, object_type: ObjectType, object_id: int, exceptions: list[Any] + ) -> None: + """Validate that the current user has access to the target object.""" + # Skip base filter so we can distinguish "not found" from "no access" + target_object = to_object_model(object_type, object_id, skip_base_filter=True) + if not target_object: + # Allow operation on stale references; no object to authorize against + return + + try: + if object_type == ObjectType.dashboard: + security_manager.raise_for_access(dashboard=target_object) + elif object_type == ObjectType.chart: + security_manager.raise_for_access(chart=target_object) + elif object_type == ObjectType.query: + security_manager.raise_for_access(query=target_object) + elif object_type == ObjectType.dataset: + security_manager.raise_for_access(datasource=target_object) + else: + exceptions.append( + TagCreateFailedError( + f"Access validation not supported for {object_type}" + ) + ) + except SupersetSecurityException: + exceptions.append( + TagCreateFailedError(f"Access denied for {object_type} {object_id}") + ) + class CreateCustomTagWithRelationshipsCommand(CreateMixin, BaseCommand): def __init__(self, data: dict[str, Any], bulk_create: bool = False): diff --git a/superset/commands/tag/delete.py b/superset/commands/tag/delete.py index 89a2a5a5568d..dfc5686d497d 100644 --- a/superset/commands/tag/delete.py +++ b/superset/commands/tag/delete.py @@ -16,7 +16,9 @@ # under the License. import logging from functools import partial +from typing import Any +from superset import security_manager from superset.commands.base import BaseCommand from superset.commands.tag.exceptions import ( TagDeleteFailedError, @@ -25,8 +27,9 @@ TagInvalidError, TagNotFoundError, ) -from superset.commands.tag.utils import to_object_type +from superset.commands.tag.utils import to_object_model, to_object_type from superset.daos.tag import TagDAO +from superset.exceptions import SupersetSecurityException from superset.tags.models import ObjectType from superset.utils.decorators import on_error, transaction from superset.views.base import DeleteMixin @@ -71,6 +74,9 @@ def validate(self) -> None: ) ) else: + # Validate user has access to the target object + self._validate_object_access(object_type, self._object_id, exceptions) + tagged_object = TagDAO.find_tagged_object( object_type=object_type, object_id=self._object_id, tag_id=tag.id ) @@ -85,6 +91,38 @@ def validate(self) -> None: if exceptions: raise TagInvalidError(exceptions=exceptions) + def _validate_object_access( + self, object_type: ObjectType, object_id: int, exceptions: list[Any] + ) -> None: + """Validate that the current user has access to the target object.""" + # Skip base filter so we can distinguish "not found" from "no access" + target_object = to_object_model(object_type, object_id, skip_base_filter=True) + if not target_object: + # Allow operation on stale references; no object to authorize against + return + + try: + if object_type == ObjectType.dashboard: + security_manager.raise_for_access(dashboard=target_object) + elif object_type == ObjectType.chart: + security_manager.raise_for_access(chart=target_object) + elif object_type == ObjectType.query: + security_manager.raise_for_access(query=target_object) + elif object_type == ObjectType.dataset: + security_manager.raise_for_access(datasource=target_object) + else: + exceptions.append( + TaggedObjectDeleteFailedError( + f"Access validation not supported for {object_type}" + ) + ) + except SupersetSecurityException: + exceptions.append( + TaggedObjectDeleteFailedError( + f"Access denied for {object_type} {object_id}" + ) + ) + class DeleteTagsCommand(DeleteMixin, BaseCommand): def __init__(self, tags: list[str]): diff --git a/superset/commands/tag/utils.py b/superset/commands/tag/utils.py index c3929cc41bc0..97237bc2ec9b 100644 --- a/superset/commands/tag/utils.py +++ b/superset/commands/tag/utils.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from typing import Optional, Union +from typing import Any, Optional, Union from superset.daos.chart import ChartDAO from superset.daos.dashboard import DashboardDAO @@ -36,12 +36,17 @@ def to_object_type(object_type: Union[ObjectType, int, str]) -> Optional[ObjectT def to_object_model( - object_type: ObjectType, object_id: int -) -> Optional[Union[Dashboard, SavedQuery, Slice]]: + object_type: ObjectType, object_id: int, skip_base_filter: bool = False +) -> Optional[Union[Dashboard, SavedQuery, Slice, Any]]: if ObjectType.dashboard == object_type: - return DashboardDAO.find_by_id(object_id) + return DashboardDAO.find_by_id(object_id, skip_base_filter=skip_base_filter) if ObjectType.query == object_type: - return SavedQueryDAO.find_by_id(object_id) + return SavedQueryDAO.find_by_id(object_id, skip_base_filter=skip_base_filter) if ObjectType.chart == object_type: - return ChartDAO.find_by_id(object_id) + return ChartDAO.find_by_id(object_id, skip_base_filter=skip_base_filter) + if ObjectType.dataset == object_type: + # Imported lazily to avoid a circular import via superset.views.base + from superset.daos.dataset import DatasetDAO + + return DatasetDAO.find_by_id(object_id, skip_base_filter=skip_base_filter) return None diff --git a/superset/databases/api.py b/superset/databases/api.py index f458c558dac4..91a67e6864bb 100644 --- a/superset/databases/api.py +++ b/superset/databases/api.py @@ -1327,6 +1327,7 @@ def related_objects(self, pk: int) -> Response: "viz_type": chart.viz_type, } for chart in data["charts"] + if security_manager.can_access_chart(chart) ] dashboards = [ { @@ -1336,6 +1337,7 @@ def related_objects(self, pk: int) -> Response: "title": dashboard.dashboard_title, } for dashboard in data["dashboards"] + if security_manager.can_access_dashboard(dashboard) ] sqllab_tab_states = [ {"id": tab_state.id, "label": tab_state.label, "active": tab_state.active} diff --git a/superset/datasets/api.py b/superset/datasets/api.py index c44a8ba2e374..cee61509067f 100644 --- a/superset/datasets/api.py +++ b/superset/datasets/api.py @@ -828,6 +828,7 @@ def related_objects(self, id_or_uuid: str) -> Response: "viz_type": chart.viz_type, } for chart in data["charts"] + if security_manager.can_access_chart(chart) ] dashboards = [ { @@ -837,6 +838,7 @@ def related_objects(self, id_or_uuid: str) -> Response: "title": dashboard.dashboard_title, } for dashboard in data["dashboards"] + if security_manager.can_access_dashboard(dashboard) ] return self.response( 200, diff --git a/tests/integration_tests/tags/commands_tests.py b/tests/integration_tests/tags/commands_tests.py index cf340c5b110f..055c51f09adc 100644 --- a/tests/integration_tests/tags/commands_tests.py +++ b/tests/integration_tests/tags/commands_tests.py @@ -39,6 +39,7 @@ from superset.models.slice import Slice # noqa: F401 from superset.tags.models import ObjectType, Tag, TaggedObject, TagType from tests.integration_tests.base_tests import SupersetTestCase +from tests.integration_tests.constants import ADMIN_USERNAME from tests.integration_tests.fixtures.importexport import ( chart_config, # noqa: F401 dashboard_config, # noqa: F401 @@ -62,6 +63,7 @@ class TestCreateCustomTagCommand(SupersetTestCase): @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices") @pytest.mark.usefixtures("with_tagging_system_feature") def test_create_custom_tag_command(self): + self.login(ADMIN_USERNAME) example_dashboard = ( db.session.query(Dashboard).filter_by(slug="world_health").one() ) @@ -96,6 +98,7 @@ class TestDeleteTagsCommand(SupersetTestCase): @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices") @pytest.mark.usefixtures("with_tagging_system_feature") def test_delete_tags_command(self): + self.login(ADMIN_USERNAME) example_dashboard = ( db.session.query(Dashboard) .filter_by(dashboard_title="World Bank's Data") @@ -130,6 +133,7 @@ class TestDeleteTaggedObjectCommand(SupersetTestCase): @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices") @pytest.mark.usefixtures("with_tagging_system_feature") def test_delete_tags_command(self): + self.login(ADMIN_USERNAME) # create tagged objects example_dashboard = ( db.session.query(Dashboard).filter_by(slug="world_health").one() From 61b32d1b7d033d4adb64e6fb97a4881ec64457ea Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:55:44 +0100 Subject: [PATCH 08/13] fix(chart): standardize dashboard validation across chart create/update (#40336) Co-authored-by: Claude Sonnet 4.6 --- superset/charts/api.py | 2 ++ superset/commands/chart/update.py | 15 +++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/superset/charts/api.py b/superset/charts/api.py index 9b692c02d9d7..e8f44f3239fd 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -441,6 +441,8 @@ def put(self, pk: int) -> Response: response = self.response_404() except ChartForbiddenError: response = self.response_403() + except DashboardsForbiddenError as ex: + response = self.response(ex.status, message=ex.message) except TagForbiddenError as ex: response = self.response(403, message=str(ex)) except ChartInvalidError as ex: diff --git a/superset/commands/chart/update.py b/superset/commands/chart/update.py index 1af742b37f3c..77679b6fe4bc 100644 --- a/superset/commands/chart/update.py +++ b/superset/commands/chart/update.py @@ -30,6 +30,7 @@ ChartInvalidError, ChartNotFoundError, ChartUpdateFailedError, + DashboardsForbiddenError, DashboardsNotFoundValidationError, DatasourceTypeUpdateRequiredValidationError, ) @@ -76,7 +77,7 @@ def _validate_new_dashboard_access( self, requested_dashboards: list[Dashboard], exceptions: list[ValidationError] ) -> None: """ - Validate user has access to any NEW dashboard relationships. + Validate user has ownership of any NEW dashboard relationships. Existing relationships are preserved to maintain chart ownership rights. """ if not self._model: @@ -86,14 +87,20 @@ def _validate_new_dashboard_access( requested_dashboard_ids = {d.id for d in requested_dashboards} if new_dashboard_ids := requested_dashboard_ids - existing_dashboard_ids: - # For NEW dashboard relationships, verify user has access + # For NEW dashboard relationships, verify user has ownership accessible_dashboards = DashboardDAO.find_by_ids(list(new_dashboard_ids)) - accessible_dashboard_ids = {d.id for d in accessible_dashboards} - unauthorized_dashboard_ids = new_dashboard_ids - accessible_dashboard_ids + unauthorized_dashboard_ids = new_dashboard_ids - { + d.id for d in accessible_dashboards + } if unauthorized_dashboard_ids: exceptions.append(DashboardsNotFoundValidationError()) + # Additional ownership check - must match CreateChartCommand behavior + for dash in accessible_dashboards: + if not security_manager.is_owner(dash): + raise DashboardsForbiddenError() + def validate(self) -> None: # noqa: C901 exceptions: list[ValidationError] = [] dashboard_ids = self._properties.get("dashboards") From 56fd991efde6402107e428934f064e25150289ac Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:55:50 +0100 Subject: [PATCH 09/13] fix(dataset): unify validation for stored and adhoc SQL expressions (#40392) Co-authored-by: Claude Opus 4.7 --- superset/commands/dataset/update.py | 51 +++++- superset/connectors/sqla/models.py | 75 +++++++- superset/sql/parse.py | 6 + .../commands/dataset/update_test.py | 160 ++++++++++++++++++ .../unit_tests/connectors/sqla/models_test.py | 123 +++++++++++++- tests/unit_tests/sql/parse_tests.py | 20 +++ 6 files changed, 430 insertions(+), 5 deletions(-) diff --git a/superset/commands/dataset/update.py b/superset/commands/dataset/update.py index 3acab024b9e3..ae72494ea675 100644 --- a/superset/commands/dataset/update.py +++ b/superset/commands/dataset/update.py @@ -44,10 +44,14 @@ DatasetUpdateFailedError, MultiCatalogDisabledValidationError, ) -from superset.connectors.sqla.models import SqlaTable +from superset.connectors.sqla.models import SqlaTable, validate_stored_expression from superset.daos.dataset import DatasetDAO from superset.datasets.schemas import FolderSchema -from superset.exceptions import SupersetParseError, SupersetSecurityException +from superset.exceptions import ( + QueryClauseValidationException, + SupersetParseError, + SupersetSecurityException, +) from superset.models.core import Database from superset.sql.parse import Table from superset.utils.decorators import on_error, transaction @@ -212,9 +216,11 @@ def _validate_semantics(self, exceptions: list[ValidationError]) -> None: self._model = cast(SqlaTable, self._model) if columns := self._properties.get("columns"): self._validate_columns(columns, exceptions) + self._validate_expressions(columns, "columns", exceptions) if metrics := self._properties.get("metrics"): self._validate_metrics(metrics, exceptions) + self._validate_expressions(metrics, "metrics", exceptions) if folders := self._properties.get("folders"): valid_uuids: set[UUID] = set() @@ -283,6 +289,47 @@ def _validate_metrics( if not DatasetDAO.validate_metrics_uniqueness(self._model_id, metric_names): exceptions.append(DatasetMetricsExistsValidationError()) + def _validate_expressions( + self, + items: list[dict[str, Any]], + label: str, + exceptions: list[ValidationError], + ) -> None: + """ + Run each item's SQL expression through the parser-based validator that + already governs adhoc expressions, so stored column and metric + expressions cannot smuggle sub-queries, set operations, or + multi-statement SQL into chart queries. + """ + self._model = cast(SqlaTable, self._model) + # `_validate_dataset_source` runs first and rebinds + # `self._properties["database"]` from the request's `database_id` + # to the resolved `Database` model when the user is repointing the + # dataset; otherwise the key is absent and we fall back to the + # currently-bound database on the model. + database = self._properties.get("database") or self._model.database + catalog = self._properties.get("catalog", self._model.catalog) + schema = self._properties.get("schema", self._model.schema) + + for idx, item in enumerate(items): + expression = item.get("expression") + if not expression: + continue + try: + validate_stored_expression(database, catalog, schema, expression) + except (SupersetSecurityException, QueryClauseValidationException) as ex: + message = ( + ex.error.message + if isinstance(ex, SupersetSecurityException) + else ex.message + ) + exceptions.append( + ValidationError( + message, + field_name=f"{label}.{idx}.expression", + ) + ) + @staticmethod def _get_duplicates(data: list[dict[str, Any]], key: str) -> list[str]: duplicates = [ diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index de2d41f38789..83ddbc3fcfe7 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -19,6 +19,7 @@ import builtins import logging +import re from collections import defaultdict from collections.abc import Hashable from dataclasses import dataclass, field @@ -78,11 +79,13 @@ ) from superset.daos.exceptions import DatasourceNotFound from superset.db_engine_specs.base import BaseEngineSpec, TimestampExpression +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import ( ColumnNotFoundException, DatasetInvalidPermissionEvaluationException, QueryObjectValidationError, SupersetGenericDBErrorException, + SupersetParseError, SupersetSecurityException, SupersetSyntaxErrorException, ) @@ -101,10 +104,11 @@ ImportExportMixin, QueryResult, SQLA_QUERY_KEYS, + validate_adhoc_subquery, ) from superset.models.slice import Slice from superset.models.sql_types.base import CurrencyType -from superset.sql.parse import Table +from superset.sql.parse import sanitize_clause, SQLStatement, Table from superset.superset_typing import ( AdhocColumn, AdhocMetric, @@ -867,6 +871,75 @@ def values_for_column(self, column_name: str, limit: int = 10000) -> list[Any]: raise NotImplementedError() +_JINJA_BLOCK_RE = re.compile( + r"\{\{.*?\}\}|\{%.*?%\}|\{#.*?#\}", + re.DOTALL, +) + + +def validate_stored_expression( + database: Database, + catalog: str | None, + schema: str | None, + expression: str | None, +) -> None: + """ + Apply the adhoc-expression validator to a stored column or metric expression. + + Wrapping in a synthetic ``SELECT `` reuses the column-position parser + rules already enforced for adhoc expressions, so the same policy on + sub-queries, set operations, and multi-statement SQL applies to stored + expressions when they are saved. + + Balanced Jinja blocks (``{{ ... }}``, ``{% ... %}``, ``{# ... #}``) are + replaced with a numeric placeholder before parsing so the surrounding SQL + is still inspected; structural attacks smuggled in the non-templated + portion of an otherwise-templated expression are still rejected. + Expressions whose substituted skeleton is unparseable (typically due to + ``{% if %}`` control-flow templating) fall back to deferring validation + to query time, when the template processor has a real context. + """ + if not expression: + return + skeleton = _JINJA_BLOCK_RE.sub(" NULL ", expression) + contains_jinja = skeleton != expression + engine = database.backend + wrapped = f"SELECT {skeleton}" + + try: + parsed = SQLStatement(wrapped, engine) + except SupersetParseError as ex: + if contains_jinja: + return + raise SupersetSecurityException( + SupersetError( + error_type=SupersetErrorType.ADHOC_SUBQUERY_NOT_ALLOWED_ERROR, + message=_( + "Custom SQL fields cannot be parsed as a single SQL statement." + ), + level=ErrorLevel.ERROR, + ) + ) from ex + + if parsed.is_set_operation(): + raise SupersetSecurityException( + SupersetError( + error_type=SupersetErrorType.ADHOC_SUBQUERY_NOT_ALLOWED_ERROR, + message=_("Custom SQL fields cannot contain set operations."), + level=ErrorLevel.ERROR, + ) + ) + + validate_adhoc_subquery( + wrapped, + database, + catalog, + schema or "", + engine, + ) + sanitize_clause(wrapped, engine) + + class TableColumn(AuditMixinNullable, ImportExportMixin, CertificationMixin, Model): """ORM object for table columns, each table can have multiple columns""" diff --git a/superset/sql/parse.py b/superset/sql/parse.py index f2f6b805782a..6f86be194b13 100644 --- a/superset/sql/parse.py +++ b/superset/sql/parse.py @@ -952,6 +952,12 @@ def has_subquery(self) -> bool: ) ) + def is_set_operation(self) -> bool: + """ + Check if the statement is a top-level set operation (UNION/INTERSECT/EXCEPT). + """ + return isinstance(self._parsed, exp.SetOperation) + def parse_predicate(self, predicate: str) -> exp.Expression: """ Parse a predicate string into an AST. diff --git a/tests/unit_tests/commands/dataset/update_test.py b/tests/unit_tests/commands/dataset/update_test.py index afc33741c51c..b71718b19154 100644 --- a/tests/unit_tests/commands/dataset/update_test.py +++ b/tests/unit_tests/commands/dataset/update_test.py @@ -240,6 +240,166 @@ def test_update_dataset_validation_errors( assert any(error_msg in str(exc) for exc in excinfo.value._exceptions) +@pytest.mark.parametrize( + ("payload", "field"), + [ + ( + { + "columns": [ + { + "column_name": "evil_col", + "expression": "1; DROP TABLE users", + } + ] + }, + "columns.0.expression", + ), + ( + { + "metrics": [ + { + "metric_name": "evil_metric", + "expression": "1 UNION SELECT password FROM ab_user", + } + ] + }, + "metrics.0.expression", + ), + ], +) +def test_update_dataset_rejects_malicious_expression( + payload: dict[str, Any], + field: str, + mocker: MockerFixture, +) -> None: + """ + Stored column and metric ``expression`` strings are routed through the + same validator as adhoc SQL fields, and command-level validation + surfaces the parser's verdict as a field-level ``ValidationError``. + """ + mock_dataset_dao = mocker.patch("superset.commands.dataset.update.DatasetDAO") + mocker.patch( + "superset.commands.dataset.update.security_manager.raise_for_ownership", + ) + mocker.patch("superset.commands.utils.security_manager.is_admin", return_value=True) + mocker.patch( + "superset.commands.utils.security_manager.get_user_by_id", return_value=None + ) + mock_database = mocker.MagicMock() + mock_database.id = 1 + mock_database.backend = "sqlite" + mock_database.allow_multi_catalog = False + mock_database.get_default_catalog.return_value = "catalog" + mock_dataset = mocker.MagicMock() + mock_dataset.database = mock_database + mock_dataset.catalog = "catalog" + mock_dataset.schema = None + mock_dataset_dao.find_by_id.return_value = mock_dataset + mock_dataset_dao.get_database_by_id.return_value = mock_database + mock_dataset_dao.validate_update_uniqueness.return_value = True + mock_dataset_dao.validate_columns_exist.return_value = True + mock_dataset_dao.validate_columns_uniqueness.return_value = True + mock_dataset_dao.validate_metrics_exist.return_value = True + mock_dataset_dao.validate_metrics_uniqueness.return_value = True + + with pytest.raises(DatasetInvalidError) as excinfo: + UpdateDatasetCommand(1, payload).run() + expression_errors = [ + exc + for exc in excinfo.value._exceptions + if isinstance(exc, ValidationError) and field in (exc.field_name or "") + ] + assert expression_errors, ( + f"Expected a field-level ValidationError on '{field}'. Got: " + f"{[(type(e).__name__, getattr(e, 'field_name', None), str(e)) for e in excinfo.value._exceptions]}" # noqa: E501 + ) + + +def test_update_dataset_accepts_benign_expression(mocker: MockerFixture) -> None: + """ + A well-formed stored expression (e.g. CASE) passes the validator: the + command's ``validate()`` collects no expression-level errors. We + invoke ``validate()`` directly to isolate the validator from the + rest of the run-path (commit, audit, etc.). + """ + mock_dataset_dao = mocker.patch("superset.commands.dataset.update.DatasetDAO") + mocker.patch( + "superset.commands.dataset.update.security_manager.raise_for_ownership", + ) + mocker.patch("superset.commands.utils.security_manager.is_admin", return_value=True) + mocker.patch( + "superset.commands.utils.security_manager.get_user_by_id", return_value=None + ) + mock_database = mocker.MagicMock() + mock_database.id = 1 + mock_database.backend = "sqlite" + mock_database.allow_multi_catalog = False + mock_database.get_default_catalog.return_value = "catalog" + mock_dataset = mocker.MagicMock() + mock_dataset.database = mock_database + mock_dataset.catalog = "catalog" + mock_dataset.schema = None + mock_dataset_dao.find_by_id.return_value = mock_dataset + mock_dataset_dao.get_database_by_id.return_value = mock_database + mock_dataset_dao.validate_update_uniqueness.return_value = True + mock_dataset_dao.validate_columns_exist.return_value = True + mock_dataset_dao.validate_columns_uniqueness.return_value = True + + payload = { + "columns": [ + { + "column_name": "case_col", + "expression": "CASE WHEN amount > 0 THEN 'a' ELSE 'b' END", + } + ] + } + UpdateDatasetCommand(1, payload).validate() + + +def test_update_dataset_accepts_jinja_expression(mocker: MockerFixture) -> None: + """ + Stored column/metric expressions can use Jinja templating (e.g. + ``{{ current_username() }}``). At save time there is no template + context, so the parser-based gate is bypassed; the same validator + re-runs on the rendered SQL at query time. + """ + mock_dataset_dao = mocker.patch("superset.commands.dataset.update.DatasetDAO") + mocker.patch( + "superset.commands.dataset.update.security_manager.raise_for_ownership", + ) + mocker.patch("superset.commands.utils.security_manager.is_admin", return_value=True) + mocker.patch( + "superset.commands.utils.security_manager.get_user_by_id", return_value=None + ) + mock_database = mocker.MagicMock() + mock_database.id = 1 + mock_database.backend = "sqlite" + mock_database.allow_multi_catalog = False + mock_database.get_default_catalog.return_value = "catalog" + mock_dataset = mocker.MagicMock() + mock_dataset.database = mock_database + mock_dataset.catalog = "catalog" + mock_dataset.schema = None + mock_dataset_dao.find_by_id.return_value = mock_dataset + mock_dataset_dao.get_database_by_id.return_value = mock_database + mock_dataset_dao.validate_update_uniqueness.return_value = True + mock_dataset_dao.validate_columns_exist.return_value = True + mock_dataset_dao.validate_columns_uniqueness.return_value = True + + payload = { + "columns": [ + { + "column_name": "user_match", + "expression": ( + "case when '{{ current_username() }}' = 'abc' " + "then 'yes' else 'no' end" + ), + } + ] + } + UpdateDatasetCommand(1, payload).validate() + + @with_feature_flags(DATASET_FOLDERS=True) def test_validate_folders(mocker: MockerFixture) -> None: """ diff --git a/tests/unit_tests/connectors/sqla/models_test.py b/tests/unit_tests/connectors/sqla/models_test.py index a8af52b2dfd0..28d5cc39d399 100644 --- a/tests/unit_tests/connectors/sqla/models_test.py +++ b/tests/unit_tests/connectors/sqla/models_test.py @@ -22,10 +22,17 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm.session import Session -from superset.connectors.sqla.models import SqlaTable, TableColumn +from superset.connectors.sqla.models import ( + SqlaTable, + TableColumn, + validate_stored_expression, +) from superset.daos.dataset import DatasetDAO from superset.daos.exceptions import DatasourceNotFound -from superset.exceptions import OAuth2RedirectError +from superset.exceptions import ( + OAuth2RedirectError, + SupersetSecurityException, +) from superset.models.core import Database from superset.sql.parse import Table from superset.superset_typing import QueryObjectDict @@ -977,3 +984,115 @@ def test_owners_data_includes_email(mocker: MockerFixture) -> None: "id": 1, "email": "john@example.com", } + + +def _database_for_expression(mocker: MockerFixture) -> Database: + database = mocker.MagicMock(spec=Database) + database.backend = "sqlite" + database.allow_multi_catalog = False + return database + + +def test_validate_stored_expression_rejects_multi_statement( + mocker: MockerFixture, +) -> None: + database = _database_for_expression(mocker) + with pytest.raises(SupersetSecurityException): + validate_stored_expression(database, None, None, "1; DROP TABLE users") + + +def test_validate_stored_expression_rejects_set_operation( + mocker: MockerFixture, +) -> None: + database = _database_for_expression(mocker) + with pytest.raises(SupersetSecurityException): + validate_stored_expression( + database, None, None, "1 UNION SELECT password FROM ab_user" + ) + + +def test_validate_stored_expression_accepts_case_expression( + mocker: MockerFixture, +) -> None: + database = _database_for_expression(mocker) + validate_stored_expression( + database, None, None, "CASE WHEN amount > 0 THEN 'a' ELSE 'b' END" + ) + + +def test_validate_stored_expression_rejects_subquery( + mocker: MockerFixture, +) -> None: + """ + With ``ALLOW_ADHOC_SUBQUERY=False`` (the default), a stored + expression that contains a sub-query is rejected by the same + ``validate_adhoc_subquery`` gate that already covers adhoc SQL. + Locks in the sub-query branch so a future refactor that + removes the ``validate_adhoc_subquery`` call gets a red test. + """ + database = _database_for_expression(mocker) + mocker.patch("superset.models.helpers.is_feature_enabled", return_value=False) + with pytest.raises(SupersetSecurityException): + validate_stored_expression( + database, + None, + None, + "(SELECT password FROM ab_user LIMIT 1)", + ) + + +@pytest.mark.parametrize( + "expression", + [ + "case when '{{ current_username() }}' = 'abc' then 'yes' else 'no' end", + "SUM(price) * {{ url_param('multiplier') }}", + "{# comment #} amount", + "{% if 1 %}amount{% endif %}", + ], +) +def test_validate_stored_expression_accepts_jinja( + mocker: MockerFixture, expression: str +) -> None: + """ + Stored expressions can contain Jinja templating. Balanced Jinja blocks + are replaced with a placeholder so the surrounding SQL is still parsed; + skeletons whose control flow leaves them unparseable defer to runtime. + """ + database = _database_for_expression(mocker) + validate_stored_expression(database, None, None, expression) + + +def test_validate_stored_expression_rejects_set_op_around_jinja( + mocker: MockerFixture, +) -> None: + """ + A ``UNION`` smuggled around a Jinja block must still be rejected: the + Jinja substitution leaves the set operator visible to the parser. + """ + database = _database_for_expression(mocker) + with pytest.raises(SupersetSecurityException): + validate_stored_expression( + database, + None, + None, + "'{{ current_username() }}' UNION SELECT password FROM ab_user", + ) + + +def test_validate_stored_expression_rejects_subquery_around_jinja( + mocker: MockerFixture, +) -> None: + """ + Sub-queries combined with a Jinja comment block must still be rejected: + stripping the ``{# ... #}`` block leaves the sub-query visible to the + ``validate_adhoc_subquery`` gate. + """ + database = _database_for_expression(mocker) + mocker.patch("superset.models.helpers.is_feature_enabled", return_value=False) + with pytest.raises(SupersetSecurityException): + validate_stored_expression( + database, + None, + None, + "(SELECT password FROM ab_user LIMIT 1) {# x #}", + ) diff --git a/tests/unit_tests/sql/parse_tests.py b/tests/unit_tests/sql/parse_tests.py index 7fd7e6a6f3e8..12248d3cec50 100644 --- a/tests/unit_tests/sql/parse_tests.py +++ b/tests/unit_tests/sql/parse_tests.py @@ -1440,6 +1440,26 @@ def test_has_destructive(sql: str, expected: bool) -> None: assert SQLScript(sql, "postgresql").has_destructive() == expected +@pytest.mark.parametrize( + "sql, expected", + [ + ("SELECT 1 UNION SELECT 2", True), + ("SELECT 1 UNION ALL SELECT 2", True), + ("SELECT 1 INTERSECT SELECT 2", True), + ("SELECT 1 EXCEPT SELECT 2", True), + ("SELECT 1", False), + ("WITH cte AS (SELECT 1) SELECT * FROM cte", False), + ("SELECT * FROM (SELECT 1 UNION SELECT 2) AS sub", False), + ], +) +def test_is_set_operation(sql: str, expected: bool) -> None: + """ + Test that ``is_set_operation`` detects top-level UNION/INTERSECT/EXCEPT + but not nested set operations inside a sub-query. + """ + assert SQLStatement(sql, "postgresql").is_set_operation() == expected + + @pytest.mark.parametrize( "kql, expected", [ From 77c2bed5f723c91ab420dbfbc364dfabeced6355 Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:55:57 +0100 Subject: [PATCH 10/13] fix(dashboards): narrow datasets payload to callers with read access (#40396) Co-authored-by: Claude Sonnet 4 --- superset/daos/dashboard.py | 2 +- superset/dashboards/api.py | 21 ++++++++++++++- superset/models/dashboard.py | 8 +++--- .../integration_tests/dashboards/api_tests.py | 26 +++++++++++++++++++ 4 files changed, 52 insertions(+), 5 deletions(-) diff --git a/superset/daos/dashboard.py b/superset/daos/dashboard.py index 5377734ecde2..b4fcba6888b5 100644 --- a/superset/daos/dashboard.py +++ b/superset/daos/dashboard.py @@ -161,7 +161,7 @@ def get_by_id_or_slug(cls, id_or_slug: int | str) -> Dashboard: return dashboard @staticmethod - def get_datasets_for_dashboard(id_or_slug: str) -> list[Any]: + def get_datasets_for_dashboard(id_or_slug: str) -> list[tuple[Any, dict[str, Any]]]: dashboard = DashboardDAO.get_by_id_or_slug(id_or_slug) return dashboard.datasets_trimmed_for_slices() diff --git a/superset/dashboards/api.py b/superset/dashboards/api.py index 5220da680a0f..ff0c2a89c668 100644 --- a/superset/dashboards/api.py +++ b/superset/dashboards/api.py @@ -570,12 +570,31 @@ def get_datasets(self, id_or_slug: str) -> Response: try: datasets = DashboardDAO.get_datasets_for_dashboard(id_or_slug) result = [ - self.dashboard_dataset_schema.dump(dataset) for dataset in datasets + self._serialize_dashboard_dataset(datasource, payload) + for datasource, payload in datasets ] return self.response(200, result=result) except (TypeError, ValueError) as err: raise DatasetValidationError(err) from err + def _serialize_dashboard_dataset( + self, datasource: Any, payload: dict[str, Any] + ) -> dict[str, Any]: + serialized = self.dashboard_dataset_schema.dump(payload) + if not security_manager.can_access_datasource(datasource): + for key in ( + "sql", + "select_star", + "fetch_values_predicate", + "template_params", + "params", + ): + serialized.pop(key, None) + for collection_key in ("columns", "metrics"): + for item in serialized.get(collection_key) or (): + item.pop("expression", None) + return serialized + @expose("//tabs", methods=("GET",)) @protect() @safe diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index 32991820776b..4653272fcbf3 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -267,13 +267,15 @@ def data(self) -> dict[str, Any]: "is_managed_externally": self.is_managed_externally, } - def datasets_trimmed_for_slices(self) -> list[dict[str, Any]]: + def datasets_trimmed_for_slices( + self, + ) -> list[tuple[BaseDatasource, dict[str, Any]]]: slices_by_datasource: dict[int, set[Slice]] = defaultdict(set) for slc in self.slices: slices_by_datasource[slc.datasource_id].add(slc) - result: list[dict[str, Any]] = [] + result: list[tuple[BaseDatasource, dict[str, Any]]] = [] for _, slices in slices_by_datasource.items(): # Use the eagerly-loaded datasource from any slice in the group @@ -281,7 +283,7 @@ def datasets_trimmed_for_slices(self) -> list[dict[str, Any]]: if datasource: # Filter out unneeded fields from the datasource payload - result.append(datasource.data_for_slices(list(slices))) + result.append((datasource, datasource.data_for_slices(list(slices)))) return result diff --git a/tests/integration_tests/dashboards/api_tests.py b/tests/integration_tests/dashboards/api_tests.py index d3f2ed68c0e8..f00f5fff930b 100644 --- a/tests/integration_tests/dashboards/api_tests.py +++ b/tests/integration_tests/dashboards/api_tests.py @@ -295,6 +295,7 @@ def test_get_dashboard_datasets(self, logger_mock): assert actual_dataset_ids == expected_dataset_ids expected_values = [0, 1] if backend() == "presto" else [0, 1, 2] assert result[0]["column_types"] == expected_values + assert "sql" in result[0] logger_mock.warning.assert_not_called() @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices") @@ -315,6 +316,31 @@ def test_get_dashboard_datasets_as_guest(self, is_guest_user, has_guest_access): for excluded_key in ["database", "owners"]: assert excluded_key not in dataset + @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices") + @patch("superset.dashboards.api.security_manager.can_access_datasource") + def test_get_dashboard_datasets_strips_definition_without_datasource_access( + self, can_access_datasource_mock + ): + can_access_datasource_mock.return_value = False + self.login(ADMIN_USERNAME) + uri = "api/v1/dashboard/world_health/datasets" + response = self.get_assert_metric(uri, "get_datasets") + assert response.status_code == 200 + data = json.loads(response.data.decode("utf-8")) + for dataset in data["result"]: + for excluded_key in [ + "sql", + "select_star", + "fetch_values_predicate", + "template_params", + "params", + ]: + assert excluded_key not in dataset + for column in dataset.get("columns") or []: + assert "expression" not in column + for metric in dataset.get("metrics") or []: + assert "expression" not in metric + @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices") @patch("superset.utils.log.logger") def test_get_dashboard_datasets_not_found(self, logger_mock): From b8a2f925ee4b9401084b4ca238be9909f7a62f4f Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:56:03 +0100 Subject: [PATCH 11/13] fix(views): enforce per-chart access check in legacy form_data endpoint (#40497) Co-authored-by: Claude Opus 4.7 --- superset/views/api.py | 19 ++++++--- tests/integration_tests/charts/api_tests.py | 46 +++++++++++++++++++++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/superset/views/api.py b/superset/views/api.py index c24a102d65e4..e1c36bf71f3f 100644 --- a/superset/views/api.py +++ b/superset/views/api.py @@ -24,13 +24,14 @@ from flask_appbuilder.security.decorators import has_access_api from flask_babel import lazy_gettext as _ -from superset import db, event_logger +from superset import event_logger from superset.commands.chart.exceptions import ( + ChartNotFoundError, TimeRangeAmbiguousError, TimeRangeParseFailError, ) +from superset.daos.chart import ChartDAO from superset.legacy import update_time_range -from superset.models.slice import Slice from superset.superset_typing import FlaskResponse from superset.utils import json from superset.utils.date_parser import get_since_until @@ -85,11 +86,17 @@ def query_form_data(self) -> FlaskResponse: Get the form_data stored in the database for existing slice. params: slice_id: integer """ - form_data = {} + form_data: dict[str, Any] = {} if slice_id := request.args.get("slice_id"): - slc = db.session.query(Slice).filter_by(id=slice_id).one_or_none() - if slc: - form_data = slc.form_data.copy() + # Reuse ChartDAO.get_by_id_or_uuid so this endpoint applies the + # same ChartFilter (dataset-scoped) as ChartRestApi.get. Both a + # missing chart and a chart the caller cannot access surface as + # ChartNotFoundError, mapped to 404 so the status code cannot be + # used to distinguish the two cases. + try: + form_data = ChartDAO.get_by_id_or_uuid(slice_id).form_data.copy() + except ChartNotFoundError: + return self.json_response({}, 404) update_time_range(form_data) diff --git a/tests/integration_tests/charts/api_tests.py b/tests/integration_tests/charts/api_tests.py index 699a126f6b89..47fc27126255 100644 --- a/tests/integration_tests/charts/api_tests.py +++ b/tests/integration_tests/charts/api_tests.py @@ -1769,6 +1769,52 @@ def test_query_form_data(self): if slice: assert data["slice_id"] == slice.id + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + def test_query_form_data_no_data_access(self): + """ + Chart API: query_form_data must refuse callers without + datasource_access on the chart's underlying dataset. Mirrors + the existing test_get_chart_no_data_access guard on + ChartRestApi (which also returns 404 to avoid leaking chart + existence to unauthorised callers). + """ + self.login(GAMMA_USERNAME) + chart_no_access = ( + db.session.query(Slice) + .filter_by(slice_name="Girl Name Cloud") + .one_or_none() + ) + assert chart_no_access is not None, ( + "fixture load_birth_names_dashboard_with_slices did not " + "create the 'Girl Name Cloud' slice" + ) + uri = f"api/v1/form_data/?slice_id={chart_no_access.id}" + rv = self.client.get(uri) + # Match ChartRestApi.get: 404 for both missing AND forbidden so + # the endpoint cannot be used to enumerate chart IDs. + assert rv.status_code == 404, ( + f"Gamma user without datasource_access should get 404 " + f"(status={rv.status_code}, body={rv.data[:200]!r})" + ) + # Defence in depth: even if a future regression returns a non- + # 200 status with a partially-filled error envelope, ensure the + # caller cannot recover form_data fields. + assert b"datasource" not in rv.data + assert b"adhoc_filters" not in rv.data + assert b"viz_type" not in rv.data + + def test_query_form_data_missing_slice(self): + """ + Chart API: a non-existent slice_id must return the same 404 as a + forbidden one, so the status code cannot be used to enumerate + which slice IDs exist. + """ + self.login(ADMIN_USERNAME) + max_id = db.session.query(func.max(Slice.id)).scalar() or 0 + uri = f"api/v1/form_data/?slice_id={max_id + 10_000}" + rv = self.client.get(uri) + assert rv.status_code == 404 + @pytest.mark.usefixtures( "load_unicode_dashboard_with_slice", "load_energy_table_with_slice", From e3ba85b1a5eea188f1121aed50d0d995692b3340 Mon Sep 17 00:00:00 2001 From: Shaitan <105581038+sha174n@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:56:10 +0100 Subject: [PATCH 12/13] fix(redirect): normalize browser-stripped whitespace before protocol-relative check (#40566) Co-authored-by: Claude Opus 4.7 --- superset/utils/link_redirect.py | 14 +++++++++- tests/unit_tests/utils/test_link_redirect.py | 29 ++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/superset/utils/link_redirect.py b/superset/utils/link_redirect.py index b95f262f3399..8707ae28ec0f 100644 --- a/superset/utils/link_redirect.py +++ b/superset/utils/link_redirect.py @@ -118,6 +118,14 @@ def process_html_links(html_content: str) -> str: return html_content +# Characters that browsers remove from URLs during parsing per the WHATWG +# URL spec (TAB, LF, CR). Both literal and percent-encoded forms must be +# stripped before any structural check, otherwise a path like +# ``/%09///host`` slips past a leading-``//`` guard because the percent- +# encoded TAB only disappears after the browser parses the URL. +_URL_STRIPPED_CONTROL_CHARS = re.compile(r"[\t\n\r]|%09|%0[ADad]") + + def is_safe_redirect_url(url: str) -> bool: """ Return True if *url* is an internal Superset URL (safe to redirect to @@ -126,7 +134,11 @@ def is_safe_redirect_url(url: str) -> bool: if not url or not url.strip(): return False - stripped = url.strip() + # Normalize the URL the same way a browser will before parsing: drop + # the TAB/LF/CR characters that the WHATWG URL parser removes, plus + # their percent-encoded forms (which some browsers also strip when + # following a Location header). + stripped = _URL_STRIPPED_CONTROL_CHARS.sub("", url.strip()) # Block protocol-relative URLs if stripped.startswith("//") or stripped.startswith("\\\\"): diff --git a/tests/unit_tests/utils/test_link_redirect.py b/tests/unit_tests/utils/test_link_redirect.py index c02eaa04a9bf..bad2658d59ac 100644 --- a/tests/unit_tests/utils/test_link_redirect.py +++ b/tests/unit_tests/utils/test_link_redirect.py @@ -141,3 +141,32 @@ def test_unsafe_no_config(app: Flask) -> None: app.config["WEBDRIVER_BASEURL"] = "" app.config["WEBDRIVER_BASEURL_USER_FRIENDLY"] = "" assert not is_safe_redirect_url("https://anything.com") + + +@pytest.mark.parametrize( + "url", + [ + "/%09///evil.com", # tab percent-encoded + "/%0a///evil.com", # LF percent-encoded + "/%0A///evil.com", + "/%0d///evil.com", # CR percent-encoded + "/%0D///evil.com", + "/\t///evil.com", # literal tab + "/\n///evil.com", # literal LF + "/\r///evil.com", # literal CR + "/%09/%09//evil.com", # multiple TABs + "/\t\t//evil.com", + ], +) +def test_unsafe_url_with_browser_stripped_whitespace(app: Flask, url: str) -> None: + """Browsers per the WHATWG URL spec strip TAB/LF/CR from URLs before + parsing, so a path like ``/%09///host`` is navigated to ``//host`` + (a protocol-relative URL) even though the unprocessed string does not + start with ``//``. The check must normalize the input the same way.""" + assert not is_safe_redirect_url(url) + + +def test_safe_path_with_tab_in_internal_segment(app: Flask) -> None: + """A tab inside a regular path segment is still a relative URL after + stripping; it must not flip the result to safe-then-unsafe.""" + assert is_safe_redirect_url("/dashboard/1?from=tab%09inside") From c914df5a67c9c53ece3fd41fe618dbe3aea08690 Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Wed, 3 Jun 2026 05:53:24 -0700 Subject: [PATCH 13/13] ci: harden CI against Docker Hub registry flakes (retries + auth) (#40700) Co-authored-by: Claude Opus 4.8 --- .github/workflows/check-python-deps.yml | 13 +++++++++++ .../superset-python-integrationtest.yml | 17 ++++++++++++++ scripts/uv-pip-compile.sh | 23 ++++++++++++++++++- 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/.github/workflows/check-python-deps.yml b/.github/workflows/check-python-deps.yml index 0201ea817cda..ca1d3eb20b68 100644 --- a/.github/workflows/check-python-deps.yml +++ b/.github/workflows/check-python-deps.yml @@ -38,6 +38,19 @@ jobs: if: steps.check.outputs.python uses: ./.github/actions/setup-backend/ + # Authenticate the Docker daemon so the python:slim pull in + # uv-pip-compile.sh uses our (much higher) authenticated rate limit + # instead of the shared-runner anonymous one. Best-effort: on fork PRs the + # secrets are unavailable, so this no-ops and the pull falls back to + # anonymous (covered by the retry loop in the script). + - name: Login to Docker Hub + if: steps.check.outputs.python + continue-on-error: true + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Run uv if: steps.check.outputs.python run: ./scripts/uv-pip-compile.sh diff --git a/.github/workflows/superset-python-integrationtest.yml b/.github/workflows/superset-python-integrationtest.yml index 580d87fcaaf2..5cfcc056fb65 100644 --- a/.github/workflows/superset-python-integrationtest.yml +++ b/.github/workflows/superset-python-integrationtest.yml @@ -27,6 +27,11 @@ jobs: services: mysql: image: mysql:8.0 + # Authenticated pulls use our higher Docker Hub rate limit. Empty on + # fork PRs (secrets unavailable) -> runner falls back to anonymous. + credentials: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} env: MYSQL_ROOT_PASSWORD: root ports: @@ -38,6 +43,9 @@ jobs: --health-retries=5 redis: image: redis:7-alpine + credentials: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} options: --entrypoint redis-server ports: - 16379:6379 @@ -121,6 +129,9 @@ jobs: services: postgres: image: postgres:17-alpine + credentials: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} env: POSTGRES_USER: superset POSTGRES_PASSWORD: superset @@ -130,6 +141,9 @@ jobs: - 15432:5432 redis: image: redis:7-alpine + credentials: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} ports: - 16379:6379 steps: @@ -186,6 +200,9 @@ jobs: services: redis: image: redis:7-alpine + credentials: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} ports: - 16379:6379 steps: diff --git a/scripts/uv-pip-compile.sh b/scripts/uv-pip-compile.sh index 4f715fe3fd44..a3ba7870adcc 100755 --- a/scripts/uv-pip-compile.sh +++ b/scripts/uv-pip-compile.sh @@ -31,11 +31,32 @@ if [ -z "$RUNNING_IN_DOCKER" ]; then echo "Running in Docker (Python ${PYTHON_VERSION} on Linux)..." + IMAGE="python:${PYTHON_VERSION}-slim" + + # Pre-pull the image with a few retries to absorb transient Docker Hub + # registry failures ("context deadline exceeded" / anonymous rate-limit blips + # on shared CI runners). Without this a flaky pull fails the whole + # check-python-deps job on an infrastructure hiccup rather than a real + # dependency drift. The pull is in the `until` condition so `set -e` does not + # abort on an individual failed attempt. + attempt=1 + max_attempts=4 + until docker pull "$IMAGE"; do + if [ "$attempt" -ge "$max_attempts" ]; then + echo "docker pull $IMAGE failed after ${max_attempts} attempts" >&2 + exit 1 + fi + delay=$((attempt * 10)) + echo "docker pull $IMAGE failed (attempt ${attempt}/${max_attempts}); retrying in ${delay}s..." >&2 + sleep "$delay" + attempt=$((attempt + 1)) + done + docker run --rm \ -v "$(pwd)":/app \ -w /app \ -e RUNNING_IN_DOCKER=1 \ - python:${PYTHON_VERSION}-slim \ + "$IMAGE" \ bash -c "pip install uv && ./scripts/uv-pip-compile.sh $*" exit $?