diff --git a/.github/workflows/bashlib.sh b/.github/workflows/bashlib.sh index 569d3b0a22cb..b7be35f7585f 100644 --- a/.github/workflows/bashlib.sh +++ b/.github/workflows/bashlib.sh @@ -20,10 +20,6 @@ set -e GITHUB_WORKSPACE=${GITHUB_WORKSPACE:-.} ASSETS_MANIFEST="$GITHUB_WORKSPACE/superset/static/assets/manifest.json" -# Rounded job start time, used to create a unique Cypress build id for -# parallelization so we can manually rerun a job after 20 minutes -NONCE=$(echo "$(date "+%Y%m%d%H%M") - ($(date +%M)%20)" | bc) - # Echo only when not in parallel mode say() { if [[ $(echo "$INPUT_PARALLEL" | tr '[:lower:]' '[:upper:]') != 'TRUE' ]]; then diff --git a/SECURITY.md b/SECURITY.md index e8ecfe56aec2..a038c9a8d882 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -109,7 +109,7 @@ If yes, it is in scope. If no, it is out of scope. The lists below apply that te - Any action an Admin role can perform through documented configuration, API, or UI. The Admin role is a trusted operational principal by policy. Per MITRE CNA Operational Rules 4.1, a qualifying vulnerability must violate a security policy; behavior within a documented trust boundary does not. - Deployment or operator decisions: the values of secrets and tokens, whether internal networks are reachable from the server, which database connectors or cache backends are enabled, which feature flags are set, where notifications are delivered, and which third-party plugins are loaded. - Compromise, modification, or malicious control of trusted backend infrastructure. Apache Superset assumes the integrity of its metastore, cache backends (for example Redis or Memcached), message brokers, secret stores, and other operator-managed infrastructure. Findings that require an attacker to read from, write to, or otherwise tamper with these systems, including injecting malicious state, serialized objects, cache entries, task metadata, configuration, or database records, are post-compromise scenarios and do not constitute vulnerabilities in Apache Superset itself. A finding remains in scope only if an unprivileged user can cause such modification through a vulnerability in Apache Superset. -- Code paths whose intended purpose is example data, demos, fixtures, local development, or documentation, rather than the production runtime. +- The continued presence of expired key-value or metastore-cache entries that have not yet been deleted from the metadata database. Such entries are excluded from reads once expired, are purged opportunistically on write, and are removed in bulk by the scheduled `prune_key_value` maintenance task; their lingering until purged is an eventual-cleanup property, not a security boundary, and does not constitute a vulnerability. - How a downstream application (spreadsheet program, email client, browser handling user-downloaded files) interprets output Apache Superset produced for it. - Findings without a reproducible proof of concept against a supported release. The burden of demonstrating exploitability rests with the reporter; findings closed for lack of a proof of concept may be refiled if one is later produced. - Brute force, rate limiting, denial of service, or resource exhaustion that does not bypass a documented control. diff --git a/UPDATING.md b/UPDATING.md index ea73f6adf630..f5ee4f80c342 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -24,6 +24,10 @@ assists people when migrating to a new version. ## Next +### YDB now uses a native sqlglot dialect + +YDB SQL parsing now relies on the dedicated [`ydb-sqlglot-plugin`](https://pypi.org/project/ydb-sqlglot-plugin/) dialect, which registers itself with sqlglot automatically. YDB users must install this plugin (e.g., via `pip install "apache-superset[ydb]"`) to avoid a `ValueError` when Superset parses YDB queries. + ### Dataset import validates catalog against the target connection Importing a dataset now validates the `catalog` field against the target database connection. When the connection has multi-catalog disabled (`allow_multi_catalog` off) and the dataset's catalog is not the connection's default catalog, the import fails instead of silently persisting the non-default catalog. This matches the validation already enforced on the dataset update path and prevents imported datasets from querying an unintended database. diff --git a/pyproject.toml b/pyproject.toml index 38dbe8508e7f..2412e04f14c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -208,7 +208,7 @@ netezza = ["nzalchemy>=11.0.2"] starrocks = ["starrocks>=1.0.0"] doris = ["pydoris>=1.0.0, <2.0.0"] oceanbase = ["oceanbase_py>=0.0.1"] -ydb = ["ydb-sqlalchemy>=0.1.2"] +ydb = ["ydb-sqlalchemy>=0.1.2", "ydb-sqlglot-plugin>=0.2.5"] development = [ # no bounds for apache-superset-extensions-cli until a stable version "apache-superset-extensions-cli", diff --git a/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx b/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx index ea24e0c8c50e..54823c1e5ce6 100644 --- a/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx +++ b/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx @@ -33,6 +33,35 @@ describe('sanitizeHtml', () => { const sanitizedString = sanitizeHtml(htmlString); expect(sanitizedString).not.toContain('script'); }); + + test('should preserve allowed presentational CSS properties', () => { + const htmlString = + '
x
'; + const sanitizedString = sanitizeHtml(htmlString); + expect(sanitizedString).toContain('color:red'); + expect(sanitizedString).toContain('background-color:blue'); + expect(sanitizedString).toContain('font-size:12px'); + expect(sanitizedString).toContain('text-align:center'); + }); + + test('should strip layout and positioning CSS properties', () => { + const htmlString = + '
x
'; + const sanitizedString = sanitizeHtml(htmlString); + expect(sanitizedString).toContain('color:red'); + expect(sanitizedString).not.toContain('position'); + expect(sanitizedString).not.toContain('z-index'); + expect(sanitizedString).not.toContain('width'); + expect(sanitizedString).not.toContain('height'); + }); + + test('should strip unsafe CSS property values', () => { + const htmlString = + '
x
'; + const sanitizedString = sanitizeHtml(htmlString); + expect(sanitizedString).not.toContain('javascript'); + expect(sanitizedString).not.toContain('url('); + }); }); describe('isProbablyHTML', () => { diff --git a/superset-frontend/packages/superset-ui-core/src/utils/html.tsx b/superset-frontend/packages/superset-ui-core/src/utils/html.tsx index 940fc232b6ac..8e5e8e482971 100644 --- a/superset-frontend/packages/superset-ui-core/src/utils/html.tsx +++ b/superset-frontend/packages/superset-ui-core/src/utils/html.tsx @@ -19,6 +19,50 @@ import { FilterXSS, getDefaultWhiteList } from 'xss'; import { DataRecordValue } from '../types'; +// Restrict inline `style` attributes to a small set of presentational CSS +// properties. Overlay/positioning properties (e.g. position, z-index, top, +// left, transform) and sizing properties that could cover the page (e.g. +// width, height) are intentionally excluded so that sanitized markup cannot +// escape its container to overlay or obscure the surrounding page. The +// allowlisted spacing/border properties (margin, padding, border) can still +// affect layout within the container, which is acceptable. The `xss` library +// also validates property values against this allowlist, stripping unsupported +// constructs such as url()/expression(). +const allowedCssProperties = { + color: true, + 'background-color': true, + 'text-align': true, + 'text-decoration': true, + 'font-family': true, + 'font-size': true, + 'font-style': true, + 'font-weight': true, + 'line-height': true, + 'letter-spacing': true, + 'white-space': true, + padding: true, + 'padding-top': true, + 'padding-right': true, + 'padding-bottom': true, + 'padding-left': true, + margin: true, + 'margin-top': true, + 'margin-right': true, + 'margin-bottom': true, + 'margin-left': true, + border: true, + 'border-color': true, + 'border-style': true, + 'border-width': true, + 'border-radius': true, + 'vertical-align': true, + // Needed by ECharts tooltips for row transparency and text truncation. + opacity: true, + 'max-width': true, + overflow: true, + 'text-overflow': true, +}; + const xssFilter = new FilterXSS({ whiteList: { ...getDefaultWhiteList(), @@ -45,7 +89,7 @@ const xssFilter = new FilterXSS({ tfoot: ['align', 'valign', 'style'], }, stripIgnoreTag: true, - css: false, + css: { whiteList: allowedCssProperties }, }); export function sanitizeHtml(htmlString: string) { diff --git a/superset-frontend/packages/superset-ui-core/test/utils/tooltip.test.ts b/superset-frontend/packages/superset-ui-core/test/utils/tooltip.test.ts index 1cb708bccdb2..55ea272167a0 100644 --- a/superset-frontend/packages/superset-ui-core/test/utils/tooltip.test.ts +++ b/superset-frontend/packages/superset-ui-core/test/utils/tooltip.test.ts @@ -161,10 +161,13 @@ test('should preserve table styling after sanitization (fixes ECharts tooltip fo `; + // The `xss` CSS filter normalizes declarations, dropping the space after + // each colon (e.g. `opacity: 0.8;` becomes `opacity:0.8;`) while preserving + // the property/value pairs themselves. const sanitized = sanitizeHtml(tableWithStyles); - expect(sanitized).toContain('style="opacity: 0.8;"'); - expect(sanitized).toContain('style="text-align: left; padding-left: 0px;"'); - expect(sanitized).toContain('style="text-align: right; padding-left: 16px;"'); + expect(sanitized).toContain('style="opacity:0.8;"'); + expect(sanitized).toContain('style="text-align:left; padding-left:0px;"'); + expect(sanitized).toContain('style="text-align:right; padding-left:16px;"'); const data = [ ['Metric', 'Value'], @@ -172,10 +175,10 @@ test('should preserve table styling after sanitization (fixes ECharts tooltip fo ]; const html = tooltipHtml(data, 'Test Tooltip'); - expect(html).toContain('style="opacity: 0.8;"'); - expect(html).toContain('text-align: left'); - expect(html).toContain('text-align: right'); - expect(html).toContain('padding-left: 0px'); - expect(html).toContain('padding-left: 16px'); - expect(html).toContain('max-width: 300px'); + expect(html).toContain('style="opacity:0.8;"'); + expect(html).toContain('text-align:left'); + expect(html).toContain('text-align:right'); + expect(html).toContain('padding-left:0px'); + expect(html).toContain('padding-left:16px'); + expect(html).toContain('max-width:300px'); }); diff --git a/superset-frontend/plugins/legacy-plugin-chart-partition/src/Partition.ts b/superset-frontend/plugins/legacy-plugin-chart-partition/src/Partition.ts index db5d47726a88..cd57ecc22e88 100644 --- a/superset-frontend/plugins/legacy-plugin-chart-partition/src/Partition.ts +++ b/superset-frontend/plugins/legacy-plugin-chart-partition/src/Partition.ts @@ -27,6 +27,7 @@ import { getNumberFormatter, getTimeFormatter, CategoricalColorNamespace, + sanitizeHtml, } from '@superset-ui/core'; interface PartitionDataNode { @@ -345,7 +346,7 @@ function Icicle(element: HTMLElement, props: IcicleProps): void { t += ''; const [tipX, tipY] = d3.mouse(element); tip - .html(t) + .html(sanitizeHtml(t)) .style('left', `${tipX + 15}px`) .style('top', `${tipY}px`); } diff --git a/superset-frontend/plugins/legacy-plugin-chart-rose/src/Rose.ts b/superset-frontend/plugins/legacy-plugin-chart-rose/src/Rose.ts index 32a3242bbc38..974d58a29725 100644 --- a/superset-frontend/plugins/legacy-plugin-chart-rose/src/Rose.ts +++ b/superset-frontend/plugins/legacy-plugin-chart-rose/src/Rose.ts @@ -27,6 +27,7 @@ import { getTimeFormatter, getNumberFormatter, CategoricalColorNamespace, + sanitizeHtml, } from '@superset-ui/core'; interface RoseDataEntry { @@ -146,24 +147,32 @@ function Rose(element: HTMLElement, props: RoseProps): void { function legendData(adatum: RoseData) { return adatum[times[0]].map((v: RoseDataEntry, i: number) => ({ disabled: state.disabled[i], + // Keep the raw name as `key` so it matches the value used for arc + // fills (colorFn is called with d.name on arcs and d.key on the + // legend). nvd3-fork's legend renders `key` via .text(), so the + // raw value is escaped at the DOM sink. key: v.name, })); } function tooltipData(d: ArcDatum, i: number, adatum: RoseData) { const timeIndex = Math.floor(d.arcId / numGroups); + // nvd3-fork's nv.models.tooltip renders the `key` strings via .html(), + // so any HTML in user-controlled column values would execute. Pass the + // keys through sanitizeHtml to strip dangerous markup while preserving + // legitimate text content. const series = useRichTooltip ? adatum[times[timeIndex]] .filter(v => !state.disabled[v.id % numGroups]) .map(v => ({ - key: v.name, + key: sanitizeHtml(v.name), value: v.value, color: colorFn(v.name, sliceId), highlight: v.id === d.arcId, })) : [ { - key: d.name, + key: sanitizeHtml(d.name), value: d.val, color: colorFn(d.name, sliceId), }, diff --git a/superset-frontend/plugins/legacy-preset-chart-nvd3/src/utils.ts b/superset-frontend/plugins/legacy-preset-chart-nvd3/src/utils.ts index 9fdeeeafa614..4474d50672e3 100644 --- a/superset-frontend/plugins/legacy-preset-chart-nvd3/src/utils.ts +++ b/superset-frontend/plugins/legacy-preset-chart-nvd3/src/utils.ts @@ -152,7 +152,7 @@ export function generateMultiLineTooltipContent(d, xFormatter, yFormatters) { d.series.forEach((series, i) => { const yFormatter = yFormatters[i]; - const key = getFormattedKey(series.key, false); + const key = getFormattedKey(series.key, true); tooltip += "" + `
` + @@ -162,7 +162,7 @@ export function generateMultiLineTooltipContent(d, xFormatter, yFormatters) { tooltip += ''; - return tooltip; + return dompurify.sanitize(tooltip); } export function generateTimePivotTooltip(d, xFormatter, yFormatter) { @@ -223,7 +223,7 @@ export function generateBubbleTooltipContent({ s += createHTMLRow(getLabel(sizeField), sizeFormatter(point.size)); s += ''; - return s; + return dompurify.sanitize(s); } // shouldRemove indicates whether the nvtooltips should be removed from the DOM @@ -262,11 +262,16 @@ export function wrapTooltip(chart) { : chart; const tooltipGeneratorFunc = tooltipLayer.tooltip.contentGenerator(); tooltipLayer.tooltip.contentGenerator(d => { - let tooltip = `
`; - tooltip += tooltipGeneratorFunc(d); - tooltip += '
'; - - return tooltip; + // The nvd3-fork default contentGenerator builds tooltip HTML with + // unescaped series keys (and feeds them into the tooltip's `.html()` + // sink at render time). Run the final string through DOMPurify so + // charts that do NOT install a custom contentGenerator (Line, Bar, + // Area, Pie, BoxPlot, etc.) cannot execute stored payloads in + // column or series names. Custom contentGenerators set elsewhere in + // this module already return sanitized output, making this a + // belt-and-braces wrap. + const tooltip = `
${tooltipGeneratorFunc(d)}
`; + return dompurify.sanitize(tooltip); }); } @@ -279,17 +284,19 @@ export function tipFactory(layer) { if (!d) { return ''; } - const title = + const rawTitle = d[layer.titleColumn] && d[layer.titleColumn].length > 0 ? `${d[layer.titleColumn]} - ${layer.name}` : layer.name; - const body = Array.isArray(layer.descriptionColumns) + const rawBody = Array.isArray(layer.descriptionColumns) ? layer.descriptionColumns.map(c => d[c]) : Object.values(d); - return `
${title}

${body.join( - ', ', - )}
`; + return dompurify.sanitize( + `
${rawTitle}

${rawBody.join( + ', ', + )}
`, + ); }); } diff --git a/superset-frontend/plugins/legacy-preset-chart-nvd3/test/utils.test.ts b/superset-frontend/plugins/legacy-preset-chart-nvd3/test/utils.test.ts index a883d4c6b7f4..a20fa38411c5 100644 --- a/superset-frontend/plugins/legacy-preset-chart-nvd3/test/utils.test.ts +++ b/superset-frontend/plugins/legacy-preset-chart-nvd3/test/utils.test.ts @@ -24,8 +24,11 @@ import { import { computeYDomain, + generateBubbleTooltipContent, + generateMultiLineTooltipContent, getTimeOrNumberFormatter, formatLabel, + tipFactory, } from '../src/utils'; const DATA = [ @@ -181,4 +184,96 @@ describe('nvd3/utils', () => { ]); }); }); + + // ------------------------------------------------------------------ + // Tooltip HTML sanitisation (XSS regression). + // Each helper below feeds user-controlled column values into a + // d3 / nvd3 .html() sink; the sanitised return must strip dangerous + // markup so a stored payload cannot execute on hover. + // ------------------------------------------------------------------ + + describe('generateBubbleTooltipContent() sanitises user input', () => { + test('strips ', + x: 1, + y: 2, + size: 3, + }, + entity: 'entity', + xField: 'x', + yField: 'y', + sizeField: 'size', + xFormatter: (v: number) => String(v), + yFormatter: (v: number) => String(v), + sizeFormatter: (v: number) => String(v), + }); + expect(html).not.toMatch(/', + color: 'red', + value: 1, + }, + ], + }, + (v: number) => String(v), + [(v: number) => String(v)], + ); + expect(html).not.toMatch(/payload', + }; + const html = tip.html()(datum); + expect(html).not.toMatch(/' + + link = str(dash.dashboard_link()) + + # The injected script tag / attribute breakout must be escaped away. + assert ""', + header_data={ + "notification_format": "PNG", + "notification_type": "Alert", + "owners": [1], + "notification_source": None, + "chart_id": None, + "dashboard_id": None, + "slack_channels": None, + "execution_id": "test-execution-id", + }, + ) + email_body = ( + EmailNotification( + recipient=ReportRecipients(type=ReportRecipientType.EMAIL), content=content + ) + ._get_content() + .body + ) + + # Injected markup in the error text must be stripped, not rendered. + assert "" not in email_body + assert "onerror=alert(1)" not in email_body + + @with_feature_flags(DATE_FORMAT_IN_EMAIL_SUBJECT=True) def test_email_subject_with_datetime() -> None: # `superset.models.helpers`, a dependency of following imports, diff --git a/tests/unit_tests/sql/parse_tests.py b/tests/unit_tests/sql/parse_tests.py index 0b83dd71dbd2..27c293229705 100644 --- a/tests/unit_tests/sql/parse_tests.py +++ b/tests/unit_tests/sql/parse_tests.py @@ -1166,6 +1166,30 @@ def test_has_mutation(engine: str, sql: str, expected: bool) -> None: assert SQLScript(sql, engine).has_mutation() == expected +@pytest.mark.parametrize( + "engine, sql, expected", + [ + # Plain SELECT parses to a proper AST node. + ("postgresql", "SELECT * FROM foo", False), + # CALL parses to ``exp.Command`` on Postgres. + ("postgresql", "CALL my_proc(1);", True), + # A script that mixes a parseable statement with an unparseable one + # is still flagged so strict scoping can refuse the whole script. + ("postgresql", "SELECT 1; CALL my_proc();", True), + # Non-sqlglot engines (e.g. Kusto KQL) do not produce a parseable + # AST and cannot have their tables enumerated, so they must be + # flagged as unparseable to fail closed under strict scoping. + ("kustokql", "print 1", True), + ], +) +def test_has_unparseable_statement(engine: str, sql: str, expected: bool) -> None: + """ + Test the `has_unparseable_statement` property used by strict scoping to + refuse statements that sqlglot couldn't fully model. + """ + assert SQLScript(sql, engine).has_unparseable_statement is expected + + @pytest.mark.parametrize( "sql", [