Redundant Joins with Time Spine and Multiple Fact Tables

**Describe the bug**
I've identified a pattern of redundant joins when querying across multiple fact tables using a time spine for conformance. Employing a time spine for this purpose is a fairly standard approach in analytics, ensuring consistent reporting across different datasets at a specific grain (e.g., daily). When I issue a query against the time_spine requesting measures from both fact_table_1 and fact_table_2, the generated SQL unnecessarily carries out the join to each fact table multiple times. This redundancy significantly increases query complexity and negatively impacts performance.

```sql
SELECT
  q_0.`time_spine__date_day`,
  `fact_table_1__value_sum` `fact_table_1__value_sum`,
  `fact_table_2__value_sum` `fact_table_2__value_sum`
FROM
  (
    SELECT
      `keys`.`time_spine__date_day`,
      sum(`fact_table_1_key__fact_table_1`.value) `fact_table_1__value_sum`
    FROM
      (
        SELECT
          DISTINCT DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_1_key__time_spine`.date),
              'UTC'
            ),
            DAY
          ) `time_spine__date_day`,
          `fact_table_1_key__fact_table_1`.surrogate_key `fact_table_1__surrogate_key`
        FROM
          (
            select
              date
            from
              unnest(generate_date_array('2025-04-19', '2025-04-22')) as date
          ) AS `fact_table_1_key__time_spine`
          LEFT JOIN (
            select
              date('2025-04-19') as record_date,
              10 as value,
              'a' as surrogate_key
            union all
            select
              date('2025-04-19') as record_date,
              10 as value,
              'b' as surrogate_key
          ) AS `fact_table_1_key__fact_table_1` ON DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_1_key__time_spine`.date),
              'UTC'
            ),
            DAY
          ) = DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_1_key__fact_table_1`.record_date),
              'UTC'
            ),
            DAY
          )
          LEFT JOIN (
            select
              date('2025-04-19') as record_date,
              20 as value,
              'c' as surrogate_key
            union all
            select
              date('2025-04-19') as record_date,
              20 as value,
              'd' as surrogate_key
          ) AS `fact_table_1_key__fact_table_2` ON DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_1_key__time_spine`.date),
              'UTC'
            ),
            DAY
          ) = DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_1_key__fact_table_2`.record_date),
              'UTC'
            ),
            DAY
          )
      ) AS `keys`
      LEFT JOIN (
        select
          date('2025-04-19') as record_date,
          10 as value,
          'a' as surrogate_key
        union all
        select
          date('2025-04-19') as record_date,
          10 as value,
          'b' as surrogate_key
      ) AS `fact_table_1_key__fact_table_1` ON `keys`.`fact_table_1__surrogate_key` = `fact_table_1_key__fact_table_1`.surrogate_key
    GROUP BY
      1
  ) as q_0
  INNER JOIN (
    SELECT
      `keys`.`time_spine__date_day`,
      sum(`fact_table_2_key__fact_table_2`.value) `fact_table_2__value_sum`
    FROM
      (
        SELECT
          DISTINCT DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_2_key__time_spine`.date),
              'UTC'
            ),
            DAY
          ) `time_spine__date_day`,
          `fact_table_2_key__fact_table_2`.surrogate_key `fact_table_2__surrogate_key`
        FROM
          (
            select
              date
            from
              unnest(generate_date_array('2025-04-19', '2025-04-22')) as date
          ) AS `fact_table_2_key__time_spine`
          LEFT JOIN (
            select
              date('2025-04-19') as record_date,
              10 as value,
              'a' as surrogate_key
            union all
            select
              date('2025-04-19') as record_date,
              10 as value,
              'b' as surrogate_key
          ) AS `fact_table_2_key__fact_table_1` ON DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_2_key__time_spine`.date),
              'UTC'
            ),
            DAY
          ) = DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_2_key__fact_table_1`.record_date),
              'UTC'
            ),
            DAY
          )
          LEFT JOIN (
            select
              date('2025-04-19') as record_date,
              20 as value,
              'c' as surrogate_key
            union all
            select
              date('2025-04-19') as record_date,
              20 as value,
              'd' as surrogate_key
          ) AS `fact_table_2_key__fact_table_2` ON DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_2_key__time_spine`.date),
              'UTC'
            ),
            DAY
          ) = DATETIME_TRUNC(
            DATETIME(
              timestamp(`fact_table_2_key__fact_table_2`.record_date),
              'UTC'
            ),
            DAY
          )
      ) AS `keys`
      LEFT JOIN (
        select
          date('2025-04-19') as record_date,
          20 as value,
          'c' as surrogate_key
        union all
        select
          date('2025-04-19') as record_date,
          20 as value,
          'd' as surrogate_key
      ) AS `fact_table_2_key__fact_table_2` ON `keys`.`fact_table_2__surrogate_key` = `fact_table_2_key__fact_table_2`.surrogate_key
    GROUP BY
      1
  ) as q_1 ON (
    q_0.`time_spine__date_day` = q_1.`time_spine__date_day`
    OR (
      q_0.`time_spine__date_day` IS NULL
      AND q_1.`time_spine__date_day` IS NULL
    )
  )
ORDER BY
  1 ASC
LIMIT
  10000
```

**To Reproduce**
See screenshots and cube schema provided.

**Expected behavior**
Ideally, the query should perform the join from the time_spine to each fact table only once to retrieve the necessary data for all the requested measures. This would lead to a more efficient and less complex SQL query.

The current behavior of repeating the joins makes this sort of pattern difficult to implement efficiently, leading to significant performance issues, especially when querying across multiple fact tables, even with moderately sized datasets. In one instance in my project, the generated query took ten minutes to run. Manually optimising the query to remove the redundant joins reduced the runtime to under five seconds, demonstrating the significant overhead caused by this issue.

**Screenshots**

![Image](https://github.com/user-attachments/assets/d5d20545-a42b-47f9-b285-5b4a92cb6b52)

**Minimally reproducible Cube Schema**
*My engine is BQ.

```yaml
cubes:
  - name: time_spine
    sql: select date from unnest(generate_date_array('2025-04-19', '2025-04-22')) as date
    public: true
    joins:
      - name: fact_table_1
        sql: >
          {date.day} = {fact_table_1.record_date.day}
        relationship: one_to_many

      - name: fact_table_2
        sql: >
          {date.day} = {fact_table_2.record_date.day}
        relationship: one_to_many

    dimensions:
      - name: date
        sql: "timestamp({CUBE}.date)"
        type: time
        primary_key: true


  - name: fact_table_1
    sql: >
      select date('2025-04-19') as record_date, 10 as value, 'a' as surrogate_key
      union all
      select date('2025-04-19') as record_date, 10 as value, 'b' as surrogate_key
    public: true

    dimensions:
      - name: surrogate_key
        sql: "{CUBE}.surrogate_key"
        type: string
        primary_key: true

      - name: record_date
        sql: "timestamp({CUBE}.record_date)"
        type: time
      
      - name: value
        sql: "{CUBE}.value"
        type: number

    measures:
      - name: value_sum
        sql: "{value}"
        type: sum


  - name: fact_table_2
    sql: >
      select date('2025-04-19') as record_date, 20 as value, 'c' as surrogate_key
      union all
      select date('2025-04-19') as record_date, 20 as value, 'd' as surrogate_key
    public: true

    dimensions:
      - name: surrogate_key
        sql: "{CUBE}.surrogate_key"
        type: string
        primary_key: true

      - name: record_date
        sql: "timestamp({CUBE}.record_date)"
        type: time
      
      - name: value
        sql: "{CUBE}.value"
        type: number

    measures:
      - name: value_sum
        sql: "{value}"
        type: sum
```

**Version:**
1.3.5

**Additional context**


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Redundant Joins with Time Spine and Multiple Fact Tables #9499

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Redundant Joins with Time Spine and Multiple Fact Tables #9499

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions