diff --git a/docs/pages/product/apis-integrations/recipes/cast-numerics.mdx b/docs/pages/product/apis-integrations/recipes/cast-numerics.mdx index 82da2d71c2c6e..2a1048a9fdc0e 100644 --- a/docs/pages/product/apis-integrations/recipes/cast-numerics.mdx +++ b/docs/pages/product/apis-integrations/recipes/cast-numerics.mdx @@ -16,7 +16,7 @@ numeric measure: ```yaml cubes: - name: cube_with_big_numbers - sql: > + sql: | SELECT 123::BIGINT AS number UNION ALL SELECT 9007199254740991::BIGINT AS number UNION ALL SELECT 9999999999999999::BIGINT AS number diff --git a/docs/pages/product/apis-integrations/recipes/sorting.mdx b/docs/pages/product/apis-integrations/recipes/sorting.mdx index 5d0bf0fe955a1..763f099700419 100644 --- a/docs/pages/product/apis-integrations/recipes/sorting.mdx +++ b/docs/pages/product/apis-integrations/recipes/sorting.mdx @@ -29,7 +29,7 @@ Consider the following data model: ```yaml cubes: - name: sort_nulls - sql: > + sql: | SELECT 1234 AS value UNION ALL SELECT 5678 AS value UNION ALL SELECT NULL AS value diff --git a/docs/pages/product/caching/recipes/incrementally-building-pre-aggregations-for-a-date-range.mdx b/docs/pages/product/caching/recipes/incrementally-building-pre-aggregations-for-a-date-range.mdx index f4be2eb3754c6..cc5b643b9a799 100644 --- a/docs/pages/product/caching/recipes/incrementally-building-pre-aggregations-for-a-date-range.mdx +++ b/docs/pages/product/caching/recipes/incrementally-building-pre-aggregations-for-a-date-range.mdx @@ -21,7 +21,7 @@ Let's use an example of a cube with a nested SQL query: ```yaml cubes: - name: users_with_organizations - sql: > + sql: | WITH users AS ( SELECT md5(company) AS organization_id, @@ -168,7 +168,7 @@ properties][ref-schema-ref-preagg-buildrange]: ```yaml cubes: - name: users_with_organizations - sql: > + sql: | WITH users AS ( SELECT md5(company) AS organization_id, diff --git a/docs/pages/product/caching/recipes/refreshing-select-partitions.mdx b/docs/pages/product/caching/recipes/refreshing-select-partitions.mdx index 87ec47f6c16be..a29dc9882580f 100644 --- a/docs/pages/product/caching/recipes/refreshing-select-partitions.mdx +++ b/docs/pages/product/caching/recipes/refreshing-select-partitions.mdx @@ -125,7 +125,7 @@ cubes: granularity: day partition_granularity: month # this is where we specify the partition refreshKey: - sql: > + sql: | SELECT max(updated_at) FROM public.orders WHERE {FILTER_PARAMS.orders.created_at.filter('created_at')} ``` diff --git a/docs/pages/product/configuration/recipes/custom-data-model-per-tenant.mdx b/docs/pages/product/configuration/recipes/custom-data-model-per-tenant.mdx index ef3ec60cfc4e0..0f6be867599d6 100644 --- a/docs/pages/product/configuration/recipes/custom-data-model-per-tenant.mdx +++ b/docs/pages/product/configuration/recipes/custom-data-model-per-tenant.mdx @@ -394,7 +394,7 @@ This is the `products` cube for the `avocado` tenant: ```yaml cubes: - name: products - sql: > + sql: | SELECT * FROM public.Products WHERE MOD (id, 2) = 1 ``` @@ -417,7 +417,7 @@ This is the `products` cube for the `mango` tenant: ```yaml cubes: - name: products - sql: > + sql: | SELECT * FROM public.Products WHERE MOD (id, 2) = 0 ``` diff --git a/docs/pages/product/configuration/reference/config.mdx b/docs/pages/product/configuration/reference/config.mdx index 12a03eead0ff9..0539f31f809db 100644 --- a/docs/pages/product/configuration/reference/config.mdx +++ b/docs/pages/product/configuration/reference/config.mdx @@ -1082,7 +1082,7 @@ You can use the custom value from extend context in your data model like this: cubes: - name: users - sql: > + sql: | SELECT * FROM users WHERE organization_id={{ securityContext['active_organization'] }} diff --git a/docs/pages/product/configuration/reference/environment-variables.mdx b/docs/pages/product/configuration/reference/environment-variables.mdx index ddb9ef4207a6b..de5d8e517a1a4 100644 --- a/docs/pages/product/configuration/reference/environment-variables.mdx +++ b/docs/pages/product/configuration/reference/environment-variables.mdx @@ -1377,6 +1377,16 @@ See [this issue](https://github.com/cube-js/cube/issues/9285) for details. +## `CUBEJS_YAML_FOLDED_STRINGS_AS_LITERAL` + +If `true`, enables interpreting [folded strings][ref-yaml-folded-and-literal] in +YAML-based data model files as literal strings. This affects how line breaks and spaces +are handled in multi-line strings. + +| Possible Values | Default in Development | Default in Production | +| --------------- | ---------------------- | --------------------- | +| `true`, `false` | `true` | `true` | + ## `CUBEJS_WEB_SOCKETS` If `true`, then use WebSocket for data fetching. @@ -1799,4 +1809,5 @@ The port for a Cube deployment to listen to API connections on. [ref-multi-stage-calculations]: /product/data-modeling/concepts/multi-stage-calculations [ref-folders]: /product/data-modeling/reference/view#folders [ref-dataviz-tools]: /product/configuration/visualization-tools -[ref-context-to-app-id]: /product/configuration/reference/config#context_to_app_id \ No newline at end of file +[ref-context-to-app-id]: /product/configuration/reference/config#context_to_app_id +[ref-yaml-folded-and-literal]: /product/data-modeling/dynamic/jinja#folded-and-literal-strings \ No newline at end of file diff --git a/docs/pages/product/data-modeling/concepts.mdx b/docs/pages/product/data-modeling/concepts.mdx index da050c99e636a..d175a3d0562a4 100644 --- a/docs/pages/product/data-modeling/concepts.mdx +++ b/docs/pages/product/data-modeling/concepts.mdx @@ -85,7 +85,7 @@ cube(`orders`, { ```yaml cubes: - name: orders - sql: > + sql: | SELECT * FROM orders, line_items WHERE orders.id = line_items.order_id @@ -564,7 +564,7 @@ Consider the following cube: ```yaml cubes: - name: employees - sql: > + sql: | SELECT 1 AS id, 'Ali' AS first_name, 20 AS age, 'Los Gatos' AS city UNION ALL SELECT 2 AS id, 'Bob' AS first_name, 30 AS age, 'San Diego' AS city UNION ALL SELECT 3 AS id, 'Eve' AS first_name, 40 AS age, 'San Diego' AS city diff --git a/docs/pages/product/data-modeling/concepts/calculated-members.mdx b/docs/pages/product/data-modeling/concepts/calculated-members.mdx index 0b833ad2e09e7..e5faefb5b8c4e 100644 --- a/docs/pages/product/data-modeling/concepts/calculated-members.mdx +++ b/docs/pages/product/data-modeling/concepts/calculated-members.mdx @@ -27,7 +27,7 @@ integer value. ```yaml cubes: - name: orders - sql: > + sql: | SELECT 1 AS id, 'processing' AS status UNION ALL SELECT 2 AS id, 'completed' AS status UNION ALL SELECT 3 AS id, 'completed' AS status @@ -177,7 +177,7 @@ between `orders` and `users`: ```yaml cubes: - name: orders - sql: > + sql: | SELECT 1 AS id, 1 AS user_id UNION ALL SELECT 2 AS id, 1 AS user_id UNION ALL SELECT 3 AS id, 2 AS user_id @@ -202,7 +202,7 @@ cubes: relationship: one_to_many - name: users - sql: > + sql: | SELECT 1 AS id, 'Alice' AS name UNION ALL SELECT 2 AS id, 'Bob' AS name @@ -308,7 +308,7 @@ granularity. It can be one of the [default granularities][ref-default-granularit ```yaml cubes: - name: users - sql: > + sql: | SELECT '2025-01-01T00:00:00Z' AS created_at UNION ALL SELECT '2025-02-01T00:00:00Z' AS created_at UNION ALL SELECT '2025-03-01T00:00:00Z' AS created_at @@ -416,7 +416,7 @@ relationship between `orders` and `users`: ```yaml cubes: - name: orders - sql: > + sql: | SELECT 1 AS id, 1 AS user_id UNION ALL SELECT 2 AS id, 1 AS user_id UNION ALL SELECT 3 AS id, 2 AS user_id @@ -437,7 +437,7 @@ cubes: relationship: one_to_many - name: users - sql: > + sql: | SELECT 1 AS id, 'Alice' AS name UNION ALL SELECT 2 AS id, 'Bob' AS name diff --git a/docs/pages/product/data-modeling/concepts/code-reusability-extending-cubes.mdx b/docs/pages/product/data-modeling/concepts/code-reusability-extending-cubes.mdx index 36bdb210872e8..bd892d7a83632 100644 --- a/docs/pages/product/data-modeling/concepts/code-reusability-extending-cubes.mdx +++ b/docs/pages/product/data-modeling/concepts/code-reusability-extending-cubes.mdx @@ -141,7 +141,7 @@ First, the `sql` parameter can be overridden in each child cube: ```yaml cubes: - name: product_purchases - sql: > + sql: | SELECT * FROM events WHERE {FILTER_PARAMS.product_purchases.timestamp.filter('time')} @@ -172,7 +172,7 @@ in the SQL query: ```yaml cubes: - name: base_events - sql: > + sql: | SELECT * FROM events WHERE diff --git a/docs/pages/product/data-modeling/concepts/data-blending.mdx b/docs/pages/product/data-modeling/concepts/data-blending.mdx index 5eacc15310b7a..3a64f592d7934 100644 --- a/docs/pages/product/data-modeling/concepts/data-blending.mdx +++ b/docs/pages/product/data-modeling/concepts/data-blending.mdx @@ -191,7 +191,7 @@ cube(`all_sales`, { ```yaml cubes: - name: all_sales - sql: > + sql: | SELECT amount, user_id AS customer_id, @@ -228,7 +228,7 @@ cubes: - sql: "{CUBE}.row_type = 'retail'" - name: online_revenue_percentage - sql: > + sql: | {online_revenue} / NULLIF({online_revenue} + {offline_revenue}, 0) type: number diff --git a/docs/pages/product/data-modeling/concepts/multi-stage-calculations.mdx b/docs/pages/product/data-modeling/concepts/multi-stage-calculations.mdx index 9950969194adb..e0e179e046ebc 100644 --- a/docs/pages/product/data-modeling/concepts/multi-stage-calculations.mdx +++ b/docs/pages/product/data-modeling/concepts/multi-stage-calculations.mdx @@ -67,7 +67,7 @@ Data model: cubes: - name: orders - sql: > + sql: | SELECT 1 AS id, '2025-01-01'::TIMESTAMP AS time UNION ALL SELECT 2 AS id, '2025-01-11'::TIMESTAMP AS time UNION ALL SELECT 3 AS id, '2025-01-21'::TIMESTAMP AS time UNION ALL @@ -136,7 +136,7 @@ Data model: ```yaml cubes: - name: period_to_date - sql: > + sql: | SELECT '2024-01-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL SELECT '2024-02-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL SELECT '2024-03-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL @@ -220,7 +220,7 @@ Data model: ```yaml cubes: - name: prior_date - sql: > + sql: | SELECT '2023-04-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL SELECT '2023-05-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL SELECT '2023-06-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL @@ -350,7 +350,7 @@ Data model: ```yaml cubes: - name: percent_of_total - sql: > + sql: | SELECT 1 AS id, 1000 AS revenue, 'A' AS product, 'USA' AS country UNION ALL SELECT 2 AS id, 2000 AS revenue, 'B' AS product, 'USA' AS country UNION ALL SELECT 3 AS id, 3000 AS revenue, 'A' AS product, 'Austria' AS country UNION ALL @@ -417,7 +417,7 @@ Data model: ```yaml cubes: - name: ranking - sql: > + sql: | SELECT 1 AS id, 1000 AS revenue, 'A' AS product, 'USA' AS country UNION ALL SELECT 2 AS id, 2000 AS revenue, 'B' AS product, 'USA' AS country UNION ALL SELECT 3 AS id, 3000 AS revenue, 'A' AS product, 'Austria' AS country UNION ALL diff --git a/docs/pages/product/data-modeling/concepts/polymorphic-cubes.mdx b/docs/pages/product/data-modeling/concepts/polymorphic-cubes.mdx index 55c821db9659d..e530251fb57ff 100644 --- a/docs/pages/product/data-modeling/concepts/polymorphic-cubes.mdx +++ b/docs/pages/product/data-modeling/concepts/polymorphic-cubes.mdx @@ -109,14 +109,14 @@ cube(`students`, { cubes: - name: teachers extends: users - sql: > + sql: | SELECT * FROM {users.sql()} WHERE type = 'teacher' - name: students extends: users - sql: > + sql: | SELECT * FROM {users.sql()} WHERE type = 'student' diff --git a/docs/pages/product/data-modeling/concepts/working-with-joins.mdx b/docs/pages/product/data-modeling/concepts/working-with-joins.mdx index 367cdb7128448..d430867310242 100644 --- a/docs/pages/product/data-modeling/concepts/working-with-joins.mdx +++ b/docs/pages/product/data-modeling/concepts/working-with-joins.mdx @@ -463,7 +463,7 @@ cube(`campaigns`, { ```yaml cubes: - name: campaigns - sql: > + sql: | SELECT campaign_id, campaign_name, @@ -571,7 +571,7 @@ cubes: joins: - name: campaigns relationship: many_to_one - sql: > + sql: | {CUBE}.campaign_id = {campaigns.id} AND {CUBE}.customer_name = {campaigns.customer_name} @@ -598,7 +598,7 @@ cubes: joins: - name: transactions relationship: one_to_many - sql: > + sql: | {CUBE}.customer_name = {transactions.customer_name} AND {CUBE}.campaign_id = {transactions.campaign_id} @@ -638,7 +638,7 @@ and `right`: ```yaml cubes: - name: left - sql: > + sql: | SELECT 1 AS id, 11 AS value UNION ALL SELECT 2 AS id, 12 AS value UNION ALL SELECT 3 AS id, 13 AS value @@ -659,7 +659,7 @@ cubes: relationship: one_to_one - name: right - sql: > + sql: | SELECT 1 AS id, 101 AS value UNION ALL SELECT 2 AS id, 102 AS value UNION ALL SELECT 3 AS id, 103 AS value @@ -874,7 +874,7 @@ cube(`customers`, { ```yaml cubes: - name: orders - sql: > + sql: | SELECT 1 AS id, 1001 AS customer_id, 123 AS revenue UNION ALL SELECT 2 AS id, 1001 AS customer_id, 234 AS revenue UNION ALL SELECT 3 AS id, 1002 AS customer_id, 345 AS revenue UNION ALL @@ -904,7 +904,7 @@ cubes: relationship: many_to_one - name: customers - sql: > + sql: | SELECT 1001 AS id, 'Alice' AS name UNION ALL SELECT 1002 AS id, 'Bob' AS name UNION ALL SELECT 1003 AS id, 'Eve' AS name @@ -1008,7 +1008,7 @@ and `c`, and both `b` and `c` join to `d`: ```yaml cubes: - name: a - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id UNION ALL SELECT 3 AS id @@ -1037,7 +1037,7 @@ cubes: relationship: one_to_one - name: b - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id UNION ALL SELECT 3 AS id @@ -1054,7 +1054,7 @@ cubes: relationship: one_to_one - name: c - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id UNION ALL SELECT 3 AS id @@ -1071,7 +1071,7 @@ cubes: relationship: one_to_one - name: d - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id UNION ALL SELECT 3 AS id @@ -1361,7 +1361,7 @@ also use join paths, as shown in the following example: ```yaml cubes: - name: a - sql: > + sql: | SELECT 1 AS id, 1 AS b_id, 1 AS c_id UNION ALL SELECT 2 AS id, 2 AS b_id, 2 AS c_id @@ -1398,7 +1398,7 @@ cubes: - a.b.c.measure2 - name: b - sql: > + sql: | SELECT 1 AS id, 1 AS c_id UNION ALL SELECT 2 AS id, 2 AS c_id @@ -1414,7 +1414,7 @@ cubes: sql: "{CUBE}.c_id = {c.id}" - name: c - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id @@ -1779,7 +1779,7 @@ cube(`companies_to_users`, { ```yaml cubes: - name: users - sql: > + sql: | SELECT 1 AS id, 'Ali' AS name UNION ALL SELECT 2 AS id, 'Bob' AS name UNION ALL SELECT 3 AS id, 'Eve' AS name @@ -1795,7 +1795,7 @@ cubes: primary_key: true - name: companies - sql: > + sql: | SELECT 11 AS id, 'Acme Corporation' AS name UNION ALL SELECT 12 AS id, 'Stark Industries' AS name @@ -1810,7 +1810,7 @@ cubes: type: string - name: companies_to_users - sql: > + sql: | SELECT 11 AS company_id, 1 AS user_id UNION ALL SELECT 11 AS company_id, 2 AS user_id UNION ALL SELECT 12 AS company_id, 3 AS user_id diff --git a/docs/pages/product/data-modeling/dynamic/_meta.js b/docs/pages/product/data-modeling/dynamic/_meta.js index 3586ee91e1753..e507fa41df9fd 100644 --- a/docs/pages/product/data-modeling/dynamic/_meta.js +++ b/docs/pages/product/data-modeling/dynamic/_meta.js @@ -1,6 +1,6 @@ module.exports = { - "jinja": "Dynamic data models with Jinja and Python", - "javascript": "Dynamic data models with JavaScript", + "jinja": "YAML, Jinja, and Python", + "javascript": "JavaScript", "code-reusability-export-and-import": "Export and import", - "schema-execution-environment": "Execution environment (JavaScript models)" + "schema-execution-environment": "Execution environment (JavaScript)" } \ No newline at end of file diff --git a/docs/pages/product/data-modeling/dynamic/javascript.mdx b/docs/pages/product/data-modeling/dynamic/javascript.mdx index 56ec5dca78a43..01cadb49580dc 100644 --- a/docs/pages/product/data-modeling/dynamic/javascript.mdx +++ b/docs/pages/product/data-modeling/dynamic/javascript.mdx @@ -1,4 +1,4 @@ -# Dynamic data models with JavaScript +# Data modeling with JavaScript diff --git a/docs/pages/product/data-modeling/dynamic/jinja.mdx b/docs/pages/product/data-modeling/dynamic/jinja.mdx index 048e873a0d96b..d797a766081bc 100644 --- a/docs/pages/product/data-modeling/dynamic/jinja.mdx +++ b/docs/pages/product/data-modeling/dynamic/jinja.mdx @@ -1,4 +1,4 @@ -# Dynamic data models with Jinja and Python +# Data modeling with YAML, Jinja, and Python Cube supports authoring dynamic data models using the [Jinja templating language][jinja] and Python. This allows de-duplicating common patterns in your data models @@ -6,23 +6,54 @@ as well as dynamically generating data models from a remote data source. Jinja is supported in all YAML data model files. +## YAML + +It is recommended to default to YAML syntax because of its simplicity and readability. + +### Folded and literal strings + +Sometimes you might want to use multi-line strings in YAML-based data models, e.g., +in parameters such as `sql` or `description`. It is recommended to use [literal][ref-yaml-literal] +(`|`) string style in such cases as it preserves line breaks. + +```yaml +cubes: + - name: orders + description: | + This cube represents customer orders. + It includes measures for total sales and order count. + sql: | + -- Fetch only relevant columns + SELECT id, created_at, total_amount + FROM staging.orders +``` + +When [folded][ref-yaml-folded] (`>`) style is used, Cube will treat it as literal, +preserving line breaks anyway. This prevents potential issues, e.g., with SQL queries +containing comments that might break if line breaks are removed. You can control this +behavior via the `CUBEJS_YAML_FOLDED_STRINGS_AS_LITERAL` environment variable. + ## Jinja Please check the [Jinja documentation][jinja-docs] for details on Jinja syntax. +### Previewing YAML + +You can preview the data model code after applying Jinja templates in the [Data +Model][ref-data-model-editor] editor by clicking ... → Jinja Preview +on files that contain Jinja templates in the sidebar. + Currently, there's no way to preview the data model code in YAML after applying -Jinja templates. -Please [track this issue](https://github.com/cube-js/cube/issues/8134). - -As a workaround, you can view the resulting data model in -[Playground](/product/workspace/playground) and [Visual Model][ref-visual-model]. -You can also introspect the data model using the [`/v1/meta` REST API -endpoint](/product/apis-integrations/rest-api/reference#base_pathv1meta). +Jinja templates in Cube Core. Please [track this issue](https://github.com/cube-js/cube/issues/8134). +You can also view the resulting data model in [Playground][ref-payground] and [Visual +Model][ref-visual-model]. Also, you can introspect the data model using the +[`/v1/meta` REST API endpoint][ref-meta-api]. + ### Loops Jinja supports [looping][jinja-docs-for-loop] over lists and dictionaries. In @@ -40,7 +71,7 @@ the following example, we loop over a list of nested properties and generate a cubes: - name: analytics - sql: > + sql: | SELECT {%- for prop in nested_properties %} {{ prop }}_prop.value AS {{ prop }} @@ -118,7 +149,7 @@ property: cubes: - name: payments - sql: > + sql: | SELECT id AS payment_id, {{ cents_to_dollars('amount') }} AS amount_usd @@ -299,4 +330,9 @@ image][ref-docker-image-extension]. [ref-cube-package]: /product/data-modeling/reference/cube-package [ref-cube-template-context]: /product/data-modeling/reference/cube-package#templatecontext-class [ref-cube-dbt-package]: /product/data-modeling/reference/cube_dbt -[ref-cube-with-dbt]: /product/data-modeling/recipes/dbt \ No newline at end of file +[ref-cube-with-dbt]: /product/data-modeling/recipes/dbt +[ref-data-model-editor]: /product/workspace/data-model +[ref-payground]: /product/workspace/playground +[ref-meta-api]: /product/apis-integrations/rest-api/reference#base_pathv1meta +[ref-yaml-literal]: https://yaml.org/spec/1.2.2/#812-literal-style +[ref-yaml-folded]: https://yaml.org/spec/1.2.2/#813-folded-style \ No newline at end of file diff --git a/docs/pages/product/data-modeling/recipes/active-users.mdx b/docs/pages/product/data-modeling/recipes/active-users.mdx index 05d9bff652586..d45966f2a1706 100644 --- a/docs/pages/product/data-modeling/recipes/active-users.mdx +++ b/docs/pages/product/data-modeling/recipes/active-users.mdx @@ -23,7 +23,7 @@ measure parameter. ```yaml cubes: - name: active_users - sql: > + sql: | SELECT user_id, created_at FROM public.orders measures: diff --git a/docs/pages/product/data-modeling/recipes/cohort-retention.mdx b/docs/pages/product/data-modeling/recipes/cohort-retention.mdx index f1369cbf8c098..d298ba8ad9fe6 100644 --- a/docs/pages/product/data-modeling/recipes/cohort-retention.mdx +++ b/docs/pages/product/data-modeling/recipes/cohort-retention.mdx @@ -47,7 +47,7 @@ the months' list. ```yaml cubes: - name: monthly_retention - sql: > + sql: | SELECT users.id as user_id, date_trunc('month', users.created_at) as signup_month, diff --git a/docs/pages/product/data-modeling/recipes/custom-calendar.mdx b/docs/pages/product/data-modeling/recipes/custom-calendar.mdx index cb456603cf6ab..4cc24a18ce139 100644 --- a/docs/pages/product/data-modeling/recipes/custom-calendar.mdx +++ b/docs/pages/product/data-modeling/recipes/custom-calendar.mdx @@ -40,7 +40,7 @@ materialized as a table using a data transformation tool: cubes: - name: calendar_454 public: false - sql: > + sql: | WITH RECURSIVE fiscal_weeks AS ( -- Step 1: Define the start of the fiscal years (Sunday closest to Feb 1st) SELECT diff --git a/docs/pages/product/data-modeling/recipes/custom-granularity.mdx b/docs/pages/product/data-modeling/recipes/custom-granularity.mdx index 3245c6cb4051c..61691d84a8596 100644 --- a/docs/pages/product/data-modeling/recipes/custom-granularity.mdx +++ b/docs/pages/product/data-modeling/recipes/custom-granularity.mdx @@ -29,7 +29,7 @@ We can also use them in further calculations like rendering `fiscal_quarter_labe ```yaml cubes: - name: custom_granularities - sql: > + sql: | SELECT '2024-01-15'::TIMESTAMP AS timestamp UNION ALL SELECT '2024-02-15'::TIMESTAMP AS timestamp UNION ALL SELECT '2024-03-15'::TIMESTAMP AS timestamp UNION ALL @@ -76,7 +76,7 @@ cubes: type: time - name: fiscal_quarter_label - sql: > + sql: | 'FY' || (EXTRACT(YEAR FROM {timestamp.fiscal_year}) + 1) || '-Q' || EXTRACT(QUARTER FROM {timestamp.fiscal_quarter} + INTERVAL '3 MONTHS') type: string diff --git a/docs/pages/product/data-modeling/recipes/dynamic-union-tables.mdx b/docs/pages/product/data-modeling/recipes/dynamic-union-tables.mdx index 3bd6b31841336..1666e87332075 100644 --- a/docs/pages/product/data-modeling/recipes/dynamic-union-tables.mdx +++ b/docs/pages/product/data-modeling/recipes/dynamic-union-tables.mdx @@ -21,7 +21,7 @@ operators: ```yaml cubes: - name: customers - sql: > + sql: | SELECT *, 'Einstein' AS name FROM einstein_data UNION ALL SELECT *, 'Pascal' AS name FROM pascal_data UNION ALL SELECT *, 'Newton' AS name FROM newton_data @@ -81,7 +81,7 @@ generate necessary SQL based on a list of tables: cubes: - name: customers - sql: > + sql: | {%- for table, name in customer_tables | items %} SELECT *, '{{ name | safe }}' AS name FROM {{ table | safe }} {% if not loop.last %}UNION ALL{% endif %} diff --git a/docs/pages/product/data-modeling/recipes/event-analytics.mdx b/docs/pages/product/data-modeling/recipes/event-analytics.mdx index 234fc276f19e1..e047accf8136d 100644 --- a/docs/pages/product/data-modeling/recipes/event-analytics.mdx +++ b/docs/pages/product/data-modeling/recipes/event-analytics.mdx @@ -81,7 +81,7 @@ cube(`events`, { ```yaml cubes: - name: events - sql: > + sql: | SELECT t.id || '-e' as event_id , t.anonymous_id as anonymous_id @@ -264,7 +264,7 @@ cube(`sessions`, { ```yaml cubes: - name: sessions - sql: > + sql: | SELECT ROW_NUMBER() OVER(PARTITION BY event.anonymous_id ORDER BY event.timestamp) || ' - '|| event.anonymous_id AS session_id , event.anonymous_id @@ -398,7 +398,7 @@ cubes: joins: - name: sessions relationship: many_to_one - sql: > + sql: | {events.anonymous_id} = {sessions.anonymous_id} AND {events.timestamp} >= {sessions.start_at} AND ({events.timestamp} < {sessions.next_start_at} or {sessions.next_start_at} is null) @@ -480,7 +480,7 @@ cubes: public: false - name: end_at - sql: > + sql: | CASE WHEN {end_raw} + INTERVAL '1 minutes' > {CUBE}.next_session_start_at THEN {CUBE}.next_session_start_at ELSE {end_raw} + INTERVAL '30 minutes' diff --git a/docs/pages/product/data-modeling/recipes/filtered-aggregates.mdx b/docs/pages/product/data-modeling/recipes/filtered-aggregates.mdx index c71c7f77ca861..d2dcea3355b2f 100644 --- a/docs/pages/product/data-modeling/recipes/filtered-aggregates.mdx +++ b/docs/pages/product/data-modeling/recipes/filtered-aggregates.mdx @@ -20,7 +20,7 @@ and the `store` cube has a one-to-many relationship with the `sales` cube: ```yml cubes: - name: retailer - sql: > + sql: | SELECT 101 AS id, 'Retailer 1' AS name, 10 AS sales_goal, '2025-02-01Z'::TIMESTAMP AS goal_start UNION ALL SELECT 102 AS id, 'Retailer 2' AS name, 10 AS sales_goal, '2025-02-01Z'::TIMESTAMP AS goal_start UNION ALL SELECT 103 AS id, 'Retailer 3' AS name, 10 AS sales_goal, '2025-02-01Z'::TIMESTAMP AS goal_start @@ -64,7 +64,7 @@ cubes: sql: "({CUBE.sales_for_goal} / NULLIF({CUBE.sales_goal}, 0))" - name: store - sql: > + sql: | SELECT 201 AS id, 'Store 1' AS name, 101 AS retailer_id UNION ALL SELECT 202 AS id, 'Store 2' AS name, 101 AS retailer_id UNION ALL SELECT 203 AS id, 'Store 3' AS name, 101 AS retailer_id UNION ALL @@ -118,7 +118,7 @@ cubes: type: sum - name: sales - sql: > + sql: | SELECT 301 AS id, 201 AS store_id, '2025-01-01Z'::TIMESTAMP AS order_date, 1 AS sales UNION ALL SELECT 302 AS id, 202 AS store_id, '2025-01-01Z'::TIMESTAMP AS order_date, 1 AS sales UNION ALL SELECT 303 AS id, 203 AS store_id, '2025-01-01Z'::TIMESTAMP AS order_date, 1 AS sales UNION ALL diff --git a/docs/pages/product/data-modeling/recipes/nested-aggregates.mdx b/docs/pages/product/data-modeling/recipes/nested-aggregates.mdx index 4c0aa7ee8a59d..0a01d9946b913 100644 --- a/docs/pages/product/data-modeling/recipes/nested-aggregates.mdx +++ b/docs/pages/product/data-modeling/recipes/nested-aggregates.mdx @@ -39,7 +39,7 @@ Consider the following data model: ```yaml cubes: - name: nested_agg_sales - sql: > + sql: | SELECT 1 AS id, 1 AS store_id, 1 AS product_id, 10 AS sales UNION ALL SELECT 2 AS id, 1 AS store_id, 1 AS product_id, 20 AS sales UNION ALL SELECT 3 AS id, 1 AS store_id, 2 AS product_id, 30 AS sales UNION ALL @@ -73,7 +73,7 @@ cubes: type: sum - name: nested_agg_stores_orders - sql: > + sql: | SELECT store_id, product_id FROM ( SELECT 1 AS id, 1 AS store_id, 1 AS product_id, 10 AS sales UNION ALL diff --git a/docs/pages/product/data-modeling/recipes/passing-dynamic-parameters-in-a-query.mdx b/docs/pages/product/data-modeling/recipes/passing-dynamic-parameters-in-a-query.mdx index a6542fd37402d..67a628d018960 100644 --- a/docs/pages/product/data-modeling/recipes/passing-dynamic-parameters-in-a-query.mdx +++ b/docs/pages/product/data-modeling/recipes/passing-dynamic-parameters-in-a-query.mdx @@ -51,7 +51,7 @@ column with a single value that the user chose so that we can use it in other ca ```yaml cubes: - name: users - sql: > + sql: | WITH data AS ( SELECT users.id AS id, @@ -91,7 +91,7 @@ cubes: - name: ratio title: Ratio Women in the City to Total Number of People - sql: > + sql: | 1.0 * {number_of_people_of_any_gender_in_the_city} / {total_number_of_women} type: number @@ -173,7 +173,7 @@ The above code shows very clearly what is happening, but it is even simplier to ```yaml cubes: - name: users - sql: > + sql: | WITH city AS ( SELECT DISTINCT city AS city_filter diff --git a/docs/pages/product/data-modeling/recipes/period-over-period.mdx b/docs/pages/product/data-modeling/recipes/period-over-period.mdx index 685122a5204e4..fb807b71e0fa2 100644 --- a/docs/pages/product/data-modeling/recipes/period-over-period.mdx +++ b/docs/pages/product/data-modeling/recipes/period-over-period.mdx @@ -34,7 +34,7 @@ their values: ```yaml cubes: - name: month_over_month - sql: > + sql: | SELECT 1 AS value, '2024-01-01'::TIMESTAMP AS date UNION ALL SELECT 2 AS value, '2024-01-01'::TIMESTAMP AS date UNION ALL SELECT 3 AS value, '2024-02-01'::TIMESTAMP AS date UNION ALL diff --git a/docs/pages/product/data-modeling/recipes/snapshots.mdx b/docs/pages/product/data-modeling/recipes/snapshots.mdx index 6b0c7bc7d6c1a..9d8f036a22c07 100644 --- a/docs/pages/product/data-modeling/recipes/snapshots.mdx +++ b/docs/pages/product/data-modeling/recipes/snapshots.mdx @@ -104,7 +104,7 @@ cube(`status_snapshots`, { cubes: - name: status_snapshots extends: statuses - sql: > + sql: | -- Create a range from the earlist date to the latest date WITH range AS ( SELECT date diff --git a/docs/pages/product/data-modeling/recipes/style-guide.mdx b/docs/pages/product/data-modeling/recipes/style-guide.mdx index 26b2bc582edfe..74d1101589103 100644 --- a/docs/pages/product/data-modeling/recipes/style-guide.mdx +++ b/docs/pages/product/data-modeling/recipes/style-guide.mdx @@ -199,7 +199,7 @@ views: ```yaml cubes: - name: california_users - sql: > + sql: | SELECT id, first_name, @@ -233,6 +233,8 @@ cubes: - Use a new line to separate list items that are dictionaries, where appropriate. - Make sure lines are no longer than 80 characters. +- Prefer [literal style][ref-yaml-folded-and-literal] (`|`) for multi-line strings +over folded style (`>`). - If quotes are needed around a string, use double quotes. ### Example YAML @@ -240,6 +242,9 @@ cubes: ```yaml cubes: - name: users + description: | + All users in the system. + Note that this dataset contains PII data. sql_table: public.users dimensions: @@ -337,3 +342,4 @@ This style guide was inspired in part by: [wiki-cte]: https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL#Common_table_expression [wiki-backtick]: https://en.wikipedia.org/wiki/Backtick +[ref-yaml-folded-and-literal]: /product/data-modeling/dynamic/jinja#folded-and-literal-strings \ No newline at end of file diff --git a/docs/pages/product/data-modeling/recipes/xirr.mdx b/docs/pages/product/data-modeling/recipes/xirr.mdx index c65921cfab76e..b5eb2192dd0af 100644 --- a/docs/pages/product/data-modeling/recipes/xirr.mdx +++ b/docs/pages/product/data-modeling/recipes/xirr.mdx @@ -38,7 +38,7 @@ Consider the following data model: ```yaml cubes: - name: payments - sql: > + sql: | SELECT '2014-01-01'::date AS date, -10000.0 AS payment UNION ALL SELECT '2014-03-01'::date AS date, 2750.0 AS payment UNION ALL SELECT '2014-10-30'::date AS date, 4250.0 AS payment UNION ALL diff --git a/docs/pages/product/data-modeling/reference/context-variables.mdx b/docs/pages/product/data-modeling/reference/context-variables.mdx index d3e24e2f1becf..b99db10c53f62 100644 --- a/docs/pages/product/data-modeling/reference/context-variables.mdx +++ b/docs/pages/product/data-modeling/reference/context-variables.mdx @@ -136,7 +136,7 @@ following syntax: ```yaml cubes: - name: cube_name - sql: > + sql: | SELECT * FROM table WHERE {FILTER_PARAMS.cube_name.member_name.filter(sql_expression)} @@ -202,7 +202,7 @@ cube(`order_facts`, { ```yaml cubes: - name: order_facts - sql: > + sql: | SELECT * FROM orders WHERE {FILTER_PARAMS.order_facts.date.filter('date')} @@ -259,7 +259,7 @@ add BigQuery shard filtering, which will reduce your billing cost. ```yaml cubes: - name: events - sql: > + sql: | SELECT * FROM schema.`events*` WHERE {FILTER_PARAMS.events.date.filter( @@ -353,7 +353,7 @@ following syntax: ```yaml cubes: - name: cube_name - sql: > + sql: | SELECT * FROM table WHERE {FILTER_GROUP( @@ -408,7 +408,7 @@ where two `FILTER_PARAMS` are combined in SQL using the `OR` operator: ```yaml cubes: - name: filter_group - sql: > + sql: | SELECT * FROM ( SELECT 1 AS a, 3 AS b UNION ALL @@ -521,7 +521,7 @@ Now, if the cube is defined the following way... ```yaml cubes: - name: filter_group - sql: > + sql: | SELECT * FROM ( SELECT 1 AS a, 3 AS b UNION ALL diff --git a/docs/pages/product/data-modeling/reference/dimensions.mdx b/docs/pages/product/data-modeling/reference/dimensions.mdx index fef2b22dfae3c..d8bcc24d68f00 100644 --- a/docs/pages/product/data-modeling/reference/dimensions.mdx +++ b/docs/pages/product/data-modeling/reference/dimensions.mdx @@ -342,7 +342,7 @@ cube(`products`, { ```yaml cubes: - name: products - sql: > + sql: | SELECT 1 AS column_a, 1 AS column_b UNION ALL SELECT 2 AS column_a, 1 AS column_b UNION ALL SELECT 1 AS column_a, 2 AS column_b UNION ALL @@ -642,7 +642,7 @@ human-friendly description. ```yaml cubes: - name: orders - sql: > + sql: | SELECT '2025-01-01T00:12:00.000Z'::TIMESTAMP AS time UNION ALL SELECT '2025-02-01T00:15:00.000Z'::TIMESTAMP AS time UNION ALL SELECT '2025-03-01T00:18:00.000Z'::TIMESTAMP AS time diff --git a/docs/pages/product/data-modeling/reference/pre-aggregations.mdx b/docs/pages/product/data-modeling/reference/pre-aggregations.mdx index d36f1433ff586..fc73501d00215 100644 --- a/docs/pages/product/data-modeling/reference/pre-aggregations.mdx +++ b/docs/pages/product/data-modeling/reference/pre-aggregations.mdx @@ -193,7 +193,7 @@ cube(`completed_orders`, { ```yaml cubes: - name: completed_orders - sql: > + sql: | SELECT * FROM orders WHERE completed = true @@ -1211,7 +1211,7 @@ cube(`orders`, { ```yaml cubes: - name: orders - sql: > + sql: | SELECT * FROM orders1 UNION ALL SELECT * FROM orders2 UNION ALL SELECT * FROM orders3 diff --git a/docs/pages/product/data-modeling/reference/segments.mdx b/docs/pages/product/data-modeling/reference/segments.mdx index 8e6e6b85cbb70..ff830d2db03ae 100644 --- a/docs/pages/product/data-modeling/reference/segments.mdx +++ b/docs/pages/product/data-modeling/reference/segments.mdx @@ -56,7 +56,7 @@ cubes: segments: - name: sf_users - sql: > + sql: | {CUBE}.location = 'San Francisco' OR {CUBE}.state = 'CA' ``` @@ -324,7 +324,7 @@ cubes: segments: - name: sf_ny_users - sql: > + sql: | {CUBE}.location = 'San Francisco' OR {CUBE}.location like '%New York%' ``` diff --git a/docs/pages/product/data-modeling/syntax.mdx b/docs/pages/product/data-modeling/syntax.mdx index 6d77fc6f2f7d3..4f81da3475fd6 100644 --- a/docs/pages/product/data-modeling/syntax.mdx +++ b/docs/pages/product/data-modeling/syntax.mdx @@ -48,7 +48,7 @@ cube(`orders`, { ```yaml cubes: - name: orders - sql: > + sql: | SELECT * FROM orders, line_items WHERE orders.id = line_items.order_id @@ -451,7 +451,7 @@ the dimension from `d`. ```yaml cubes: - name: a - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id UNION ALL SELECT 3 AS id @@ -480,7 +480,7 @@ cubes: relationship: one_to_one - name: b - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id UNION ALL SELECT 3 AS id @@ -497,7 +497,7 @@ cubes: relationship: one_to_one - name: c - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id UNION ALL SELECT 3 AS id @@ -514,7 +514,7 @@ cubes: relationship: one_to_one - name: d - sql: > + sql: | SELECT 1 AS id UNION ALL SELECT 2 AS id UNION ALL SELECT 3 AS id @@ -781,13 +781,13 @@ cubes: sql_table: organisms - name: animals - sql: > + sql: | SELECT * FROM {organisms.sql()} WHERE kingdom = 'animals' - name: dogs - sql: > + sql: | SELECT * FROM {animals.sql()} WHERE species = 'dogs' @@ -824,7 +824,7 @@ In YAML data models, use `{reference}`: ```yaml cubes: - name: orders - sql: > + sql: | SELECT id, created_at FROM {other_cube.sql()} @@ -871,7 +871,7 @@ cubes: sql: SELECT CAST('\{"key":"value"\}'::JSON AS TEXT) AS json_column - name: csv_from_s3_in_duckdb - sql: > + sql: | SELECT * FROM read_csv( 's3://bbb/aaa.csv', diff --git a/docs/pages/product/workspace/recipes/query-history-export.mdx b/docs/pages/product/workspace/recipes/query-history-export.mdx index 4c5fe8e5316b3..05f7f14c585cf 100644 --- a/docs/pages/product/workspace/recipes/query-history-export.mdx +++ b/docs/pages/product/workspace/recipes/query-history-export.mdx @@ -74,7 +74,7 @@ With this data model, you can run queries that aggregate data by dimensions such ```yaml cubes: - name: requests - sql: > + sql: | SELECT *, api_response_duration_ms / 1000 AS api_response_duration, @@ -148,7 +148,7 @@ cubes: type: count - name: count_non_production - description: > + description: | Counts all non-production environments. See for details: https://cube.dev/docs/product/workspace/environments type: count diff --git a/packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts b/packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts index abc207be9cb05..e75ea39b12373 100644 --- a/packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts +++ b/packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts @@ -82,7 +82,8 @@ export class YamlCompiler { return; } - const yamlObj: any = YAML.load(file.content); + const processedContent = this.preprocessYamlSqlMultilineValues(file.content); + const yamlObj: any = YAML.load(processedContent); if (!yamlObj) { return; } @@ -345,4 +346,13 @@ export class YamlCompiler { return ast; } + + private preprocessYamlSqlMultilineValues(yamlContent: string): string { + // Convert all folded scalars (sql: >) to literal scalars (sql: |) + // to preserve SQL formatting including comments and whitespace + return yamlContent.replace( + /(\s+sql:\s*)>/g, + '$1|' + ); + } } diff --git a/packages/cubejs-schema-compiler/test/unit/js-sql-comments.test.ts b/packages/cubejs-schema-compiler/test/unit/js-sql-comments.test.ts new file mode 100644 index 0000000000000..49abab2797d5c --- /dev/null +++ b/packages/cubejs-schema-compiler/test/unit/js-sql-comments.test.ts @@ -0,0 +1,131 @@ +import { PostgresQuery } from '../../src/adapter/PostgresQuery'; +import { prepareCompiler } from './PrepareCompiler'; + +describe('JavaScript SQL Comments Preservation', () => { + it('preserves SQL comments in JS models', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareCompiler([ + { + fileName: 'test.js', + content: ` + cube('JSTestCube', { + sql: \` + SELECT + r.id as record_id, + r.created_at as record_created_at, + -- Extract target_record_id from workspace association JSON + JSON_EXTRACT_SCALAR(workspace.value, '$[0].target_record_id') as workspace_target_record_id, + -- Get actual workspace name by joining with workspace record + CASE + WHEN workspace_name.value IS NOT NULL + THEN JSON_EXTRACT_SCALAR(JSON_EXTRACT_ARRAY(workspace_name.value)[OFFSET(0)], '$.value') + ELSE NULL + END as workspace_name + FROM \\\`table\\\`.\\\`record\\\` r + JOIN \\\`table\\\`.\\\`object\\\` o ON r.object_id = o.id + -- Get company name + LEFT JOIN \\\`table\\\`.\\\`record_value\\\` company_name ON r.id = company_name.record_id + AND company_name.name = 'name' + WHERE r._fivetran_deleted = FALSE + AND o.singular_noun = 'Company' + \`, + + dimensions: { + record_id: { + sql: 'record_id', + type: 'string', + primaryKey: true + } + }, + + measures: { + count: { + type: 'count' + } + } + }); + ` + } + ]); + + await compiler.compile(); + + // Build a simple query to extract the actual SQL + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['JSTestCube.count'], + dimensions: ['JSTestCube.record_id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Verify that SQL comments are preserved on separate lines + expect(sql).toContain('-- Extract target_record_id from workspace association JSON'); + expect(sql).toContain('-- Get actual workspace name by joining with workspace record'); + expect(sql).toContain('-- Get company name'); + + // Ensure comments are on separate lines in JS models + const lines = sql.split('\n'); + const commentLine = lines.find(line => line.trim() === '-- Get company name'); + expect(commentLine).toBeDefined(); + }); + + it('handles edge cases in JS SQL strings', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareCompiler([ + { + fileName: 'edge-cases.js', + content: ` + cube('EdgeCasesTest', { + sql: \` + SELECT + id, + -- Comment with 'quotes' and "double quotes" + name, + -- Comment with special chars: !@#$%^&*() + email, + created_at + FROM users + -- SQL string in comment: SELECT * FROM table + WHERE active = true + \`, + + dimensions: { + id: { + sql: 'id', + type: 'string', + primaryKey: true + } + }, + + measures: { + count: { + type: 'count' + } + } + }); + ` + } + ]); + + await compiler.compile(); + + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['EdgeCasesTest.count'], + dimensions: ['EdgeCasesTest.id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + const testLines = [ + '-- Comment with \'quotes\' and "double quotes"', + '-- Comment with special chars: !@#$%^&*()', + '-- SQL string in comment: SELECT * FROM table', + ]; + + // Ensure all comments are properly preserved + const lines = sql.split('\n').map(l => l.trim()); + for (const testLine of testLines) { + expect(lines.includes(testLine)).toBeTruthy(); + } + }); +}); diff --git a/packages/cubejs-schema-compiler/test/unit/yaml-sql-comments.test.ts b/packages/cubejs-schema-compiler/test/unit/yaml-sql-comments.test.ts new file mode 100644 index 0000000000000..81401bfa495ad --- /dev/null +++ b/packages/cubejs-schema-compiler/test/unit/yaml-sql-comments.test.ts @@ -0,0 +1,174 @@ +import { PostgresQuery } from '../../src/adapter/PostgresQuery'; +import { prepareYamlCompiler } from './PrepareCompiler'; + +describe('YAML SQL Formatting Preservation', () => { + it('handles sql: > (folded scalar)', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler( + ` + cubes: + - name: Orders + sql: > + SELECT + r.id as record_id, + r.created_at as record_created_at, + -- Extract target_record_id from workspace association JSON + JSON_EXTRACT_SCALAR(workspace.value, '$[0].target_record_id') as workspace_target_record_id, + -- Get actual workspace name by joining with workspace record + CASE + WHEN workspace_name.value IS NOT NULL + THEN JSON_EXTRACT_SCALAR(JSON_EXTRACT_ARRAY(workspace_name.value)[OFFSET(0)], '$.value') + ELSE NULL + END as workspace_name + FROM \`table\`.\`record\` r + JOIN \`table\`.\`object\` o ON r.object_id = o.id + -- Get company name + LEFT JOIN \`table\`.\`record_value\` company_name ON r.id = company_name.record_id + AND company_name.name = 'name' + WHERE r._fivetran_deleted = FALSE + AND o.singular_noun = 'Company' + + dimensions: + - name: record_id + sql: record_id + type: string + primaryKey: true + measures: + - name: count + type: count + ` + ); + + await compiler.compile(); + + // Build a simple query to extract the actual SQL + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['Orders.count'], + dimensions: ['Orders.record_id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Verify that SQL comments are preserved on separate lines + expect(sql).toContain('-- Extract target_record_id from workspace association JSON'); + expect(sql).toContain('-- Get actual workspace name by joining with workspace record'); + expect(sql).toContain('-- Get company name'); + + // Most importantly, ensure comments are NOT merged with the previous line + const lines = sql.split('\n'); + const commentLine = lines.find(line => line.trim() === '-- Get company name'); + expect(commentLine).toBeDefined(); + }); + + it('handles sql: | (literal scalar)', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler( + ` + cubes: + - name: TestCube + sql: | + SELECT id, name + -- Comment here + FROM table1 + WHERE active = true + + dimensions: + - name: id + sql: id + type: string + primaryKey: true + measures: + - name: count + type: count + ` + ); + + await compiler.compile(); + + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['TestCube.count'], + dimensions: ['TestCube.id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Should preserve comments with literal scalar + expect(sql).toContain('-- Comment here'); + const lines = sql.split('\n'); + const commentLine = lines.find(line => line.trim() === '-- Comment here'); + expect(commentLine).toBeDefined(); + }); + + it('handles single-line SQL without multilines', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler( + ` + cubes: + - name: TestCube + sql: "SELECT id, name FROM table1" + + dimensions: + - name: id + sql: id + type: string + primaryKey: true + measures: + - name: count + type: count + ` + ); + + await compiler.compile(); + + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['TestCube.count'], + dimensions: ['TestCube.id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Should work normally for single-line SQL + expect(sql).toContain('SELECT id, name FROM table1'); + }); + + it('works correctly for SQL without comments', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler( + ` + cubes: + - name: SimpleOrders + sql: > + SELECT + id, + amount, + status + FROM orders + WHERE active = true + + dimensions: + - name: id + sql: id + type: string + primaryKey: true + measures: + - name: count + type: count + ` + ); + + await compiler.compile(); + + // Build a simple query to extract the actual SQL + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['SimpleOrders.count'], + dimensions: ['SimpleOrders.id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Should still work normally for SQL without comments + expect(sql).toContain('SELECT'); + expect(sql).toContain('FROM orders'); + expect(sql).toContain('WHERE active = true'); + }); +});