diff --git a/liquid_clustering_analysis_20250814_102217.json b/liquid_clustering_analysis_20250814_102217.json deleted file mode 100644 index 3bd173b..0000000 --- a/liquid_clustering_analysis_20250814_102217.json +++ /dev/null @@ -1,207 +0,0 @@ -{ - "llm_analysis": "❌ Failed to obtain Databricks token. Please set the environment variable DATABRICKS_TOKEN.", - "extracted_data": { - "filter_columns": [ - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_sold_date_sk IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date >= DATE '1990-01-02')", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date <= DATE '2020-01-02')", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - } - ], - "join_columns": [ - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_sold_date_sk", - "key_type": "LEFT_KEYS", - "node_name": "Left Semi Join", - "node_tag": "PHOTON_BROADCAST_HASH_JOIN_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk", - "key_type": "RIGHT_KEYS", - "node_name": "Left Semi Join", - "node_tag": "PHOTON_BROADCAST_HASH_JOIN_EXEC" - } - ], - "groupby_columns": [ - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_bill_customer_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_bill_customer_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - } - ], - "aggregate_columns": [ - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - } - ], - "table_info": { - "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo": { - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC", - "node_id": "13624", - "current_clustering_keys": [ - "cs_sold_date_sk" - ], - "table_size_gb": 1273.6760133057833, - "files_read_bytes": 1367599205712, - "files_pruned_bytes": 4689827890, - "io_read_bytes": 2858822237, - "total_scan_gb": 1278.0437558908015, - "size_classification": "large" - }, - "tpcds.tpcds_sf10000_delta_lc.date_dim": { - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC", - "node_id": "13416", - "current_clustering_keys": [ - "d_date_sk" - ], - "table_size_gb": 0.0017275810241699219, - "files_read_bytes": 1854976, - "files_pruned_bytes": 0, - "io_read_bytes": 0, - "total_scan_gb": 0.0017275810241699219, - "size_classification": "small" - } - }, - "scan_nodes": [], - "join_nodes": [], - "filter_nodes": [], - "metadata_summary": { - "total_nodes": 30, - "total_graphs": 1, - "filter_expressions_count": 5, - "join_expressions_count": 2, - "groupby_expressions_count": 3, - "aggregate_expressions_count": 16, - "tables_identified": 2, - "scan_nodes_count": 0, - "join_nodes_count": 0, - "filter_nodes_count": 0 - } - }, - "performance_context": { - "total_time_sec": 19.261, - "read_gb": 0, - "rows_produced": 10000, - "rows_read": 14327929995, - "data_selectivity": 0.05197870591171949 - }, - "summary": { - "analysis_method": "LLM-based", - "tables_identified": 2, - "total_filter_columns": 5, - "total_join_columns": 2, - "total_groupby_columns": 3, - "total_aggregate_columns": 16, - "scan_nodes_count": 0, - "llm_provider": "databricks" - } -} \ No newline at end of file diff --git a/liquid_clustering_analysis_20250814_114548.json b/liquid_clustering_analysis_20250814_114548.json deleted file mode 100644 index 3bd173b..0000000 --- a/liquid_clustering_analysis_20250814_114548.json +++ /dev/null @@ -1,207 +0,0 @@ -{ - "llm_analysis": "❌ Failed to obtain Databricks token. Please set the environment variable DATABRICKS_TOKEN.", - "extracted_data": { - "filter_columns": [ - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_sold_date_sk IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date >= DATE '1990-01-02')", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date <= DATE '2020-01-02')", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - } - ], - "join_columns": [ - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_sold_date_sk", - "key_type": "LEFT_KEYS", - "node_name": "Left Semi Join", - "node_tag": "PHOTON_BROADCAST_HASH_JOIN_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk", - "key_type": "RIGHT_KEYS", - "node_name": "Left Semi Join", - "node_tag": "PHOTON_BROADCAST_HASH_JOIN_EXEC" - } - ], - "groupby_columns": [ - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_bill_customer_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_bill_customer_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - } - ], - "aggregate_columns": [ - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - } - ], - "table_info": { - "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo": { - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC", - "node_id": "13624", - "current_clustering_keys": [ - "cs_sold_date_sk" - ], - "table_size_gb": 1273.6760133057833, - "files_read_bytes": 1367599205712, - "files_pruned_bytes": 4689827890, - "io_read_bytes": 2858822237, - "total_scan_gb": 1278.0437558908015, - "size_classification": "large" - }, - "tpcds.tpcds_sf10000_delta_lc.date_dim": { - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC", - "node_id": "13416", - "current_clustering_keys": [ - "d_date_sk" - ], - "table_size_gb": 0.0017275810241699219, - "files_read_bytes": 1854976, - "files_pruned_bytes": 0, - "io_read_bytes": 0, - "total_scan_gb": 0.0017275810241699219, - "size_classification": "small" - } - }, - "scan_nodes": [], - "join_nodes": [], - "filter_nodes": [], - "metadata_summary": { - "total_nodes": 30, - "total_graphs": 1, - "filter_expressions_count": 5, - "join_expressions_count": 2, - "groupby_expressions_count": 3, - "aggregate_expressions_count": 16, - "tables_identified": 2, - "scan_nodes_count": 0, - "join_nodes_count": 0, - "filter_nodes_count": 0 - } - }, - "performance_context": { - "total_time_sec": 19.261, - "read_gb": 0, - "rows_produced": 10000, - "rows_read": 14327929995, - "data_selectivity": 0.05197870591171949 - }, - "summary": { - "analysis_method": "LLM-based", - "tables_identified": 2, - "total_filter_columns": 5, - "total_join_columns": 2, - "total_groupby_columns": 3, - "total_aggregate_columns": 16, - "scan_nodes_count": 0, - "llm_provider": "databricks" - } -} \ No newline at end of file diff --git a/liquid_clustering_analysis_20250814_120316.json b/liquid_clustering_analysis_20250814_120316.json deleted file mode 100644 index 3bd173b..0000000 --- a/liquid_clustering_analysis_20250814_120316.json +++ /dev/null @@ -1,207 +0,0 @@ -{ - "llm_analysis": "❌ Failed to obtain Databricks token. Please set the environment variable DATABRICKS_TOKEN.", - "extracted_data": { - "filter_columns": [ - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_sold_date_sk IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date >= DATE '1990-01-02')", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date <= DATE '2020-01-02')", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - } - ], - "join_columns": [ - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_sold_date_sk", - "key_type": "LEFT_KEYS", - "node_name": "Left Semi Join", - "node_tag": "PHOTON_BROADCAST_HASH_JOIN_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk", - "key_type": "RIGHT_KEYS", - "node_name": "Left Semi Join", - "node_tag": "PHOTON_BROADCAST_HASH_JOIN_EXEC" - } - ], - "groupby_columns": [ - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_bill_customer_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_bill_customer_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - } - ], - "aggregate_columns": [ - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - } - ], - "table_info": { - "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo": { - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC", - "node_id": "13624", - "current_clustering_keys": [ - "cs_sold_date_sk" - ], - "table_size_gb": 1273.6760133057833, - "files_read_bytes": 1367599205712, - "files_pruned_bytes": 4689827890, - "io_read_bytes": 2858822237, - "total_scan_gb": 1278.0437558908015, - "size_classification": "large" - }, - "tpcds.tpcds_sf10000_delta_lc.date_dim": { - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC", - "node_id": "13416", - "current_clustering_keys": [ - "d_date_sk" - ], - "table_size_gb": 0.0017275810241699219, - "files_read_bytes": 1854976, - "files_pruned_bytes": 0, - "io_read_bytes": 0, - "total_scan_gb": 0.0017275810241699219, - "size_classification": "small" - } - }, - "scan_nodes": [], - "join_nodes": [], - "filter_nodes": [], - "metadata_summary": { - "total_nodes": 30, - "total_graphs": 1, - "filter_expressions_count": 5, - "join_expressions_count": 2, - "groupby_expressions_count": 3, - "aggregate_expressions_count": 16, - "tables_identified": 2, - "scan_nodes_count": 0, - "join_nodes_count": 0, - "filter_nodes_count": 0 - } - }, - "performance_context": { - "total_time_sec": 19.261, - "read_gb": 0, - "rows_produced": 10000, - "rows_read": 14327929995, - "data_selectivity": 0.05197870591171949 - }, - "summary": { - "analysis_method": "LLM-based", - "tables_identified": 2, - "total_filter_columns": 5, - "total_join_columns": 2, - "total_groupby_columns": 3, - "total_aggregate_columns": 16, - "scan_nodes_count": 0, - "llm_provider": "databricks" - } -} \ No newline at end of file diff --git a/liquid_clustering_analysis_20250814_121052.json b/liquid_clustering_analysis_20250814_121052.json deleted file mode 100644 index 3bd173b..0000000 --- a/liquid_clustering_analysis_20250814_121052.json +++ /dev/null @@ -1,207 +0,0 @@ -{ - "llm_analysis": "❌ Failed to obtain Databricks token. Please set the environment variable DATABRICKS_TOKEN.", - "extracted_data": { - "filter_columns": [ - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_sold_date_sk IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date >= DATE '1990-01-02')", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date <= DATE '2020-01-02')", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - }, - { - "expression": "(tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk IS NOT NULL)", - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC" - } - ], - "join_columns": [ - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_sold_date_sk", - "key_type": "LEFT_KEYS", - "node_name": "Left Semi Join", - "node_tag": "PHOTON_BROADCAST_HASH_JOIN_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk", - "key_type": "RIGHT_KEYS", - "node_name": "Left Semi Join", - "node_tag": "PHOTON_BROADCAST_HASH_JOIN_EXEC" - } - ], - "groupby_columns": [ - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_bill_customer_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_bill_customer_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "tpcds.tpcds_sf10000_delta_lc.date_dim.d_date_sk", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - } - ], - "aggregate_columns": [ - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_ext_sales_price)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "avg(unscaledvalue(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit))", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "min(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "max(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - }, - { - "expression": "count(tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo.cs_net_profit)", - "node_name": "Grouping Aggregate", - "node_tag": "PHOTON_GROUPING_AGG_EXEC" - } - ], - "table_info": { - "tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo": { - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.catalog_sales_demo", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC", - "node_id": "13624", - "current_clustering_keys": [ - "cs_sold_date_sk" - ], - "table_size_gb": 1273.6760133057833, - "files_read_bytes": 1367599205712, - "files_pruned_bytes": 4689827890, - "io_read_bytes": 2858822237, - "total_scan_gb": 1278.0437558908015, - "size_classification": "large" - }, - "tpcds.tpcds_sf10000_delta_lc.date_dim": { - "node_name": "Scan tpcds.tpcds_sf10000_delta_lc.date_dim", - "node_tag": "UNKNOWN_DATA_SOURCE_SCAN_EXEC", - "node_id": "13416", - "current_clustering_keys": [ - "d_date_sk" - ], - "table_size_gb": 0.0017275810241699219, - "files_read_bytes": 1854976, - "files_pruned_bytes": 0, - "io_read_bytes": 0, - "total_scan_gb": 0.0017275810241699219, - "size_classification": "small" - } - }, - "scan_nodes": [], - "join_nodes": [], - "filter_nodes": [], - "metadata_summary": { - "total_nodes": 30, - "total_graphs": 1, - "filter_expressions_count": 5, - "join_expressions_count": 2, - "groupby_expressions_count": 3, - "aggregate_expressions_count": 16, - "tables_identified": 2, - "scan_nodes_count": 0, - "join_nodes_count": 0, - "filter_nodes_count": 0 - } - }, - "performance_context": { - "total_time_sec": 19.261, - "read_gb": 0, - "rows_produced": 10000, - "rows_read": 14327929995, - "data_selectivity": 0.05197870591171949 - }, - "summary": { - "analysis_method": "LLM-based", - "tables_identified": 2, - "total_filter_columns": 5, - "total_join_columns": 2, - "total_groupby_columns": 3, - "total_aggregate_columns": 16, - "scan_nodes_count": 0, - "llm_provider": "databricks" - } -} \ No newline at end of file