diff --git a/dev-requirements.txt b/dev-requirements.txt index dbc834e..5355146 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -5,8 +5,11 @@ # pip-compile --no-annotate dev-requirements.in # -e file:. +about-time==4.2.1 aiohttp==3.8.3 aiosignal==1.2.0 +alive-progress==3.2.0 +anyio==4.9.0 appdirs==1.4.4 astroid==2.12.12 async-timeout==4.0.2 @@ -19,24 +22,31 @@ cfgv==3.3.1 charset-normalizer==2.0.12 click==8.1.2 codespell==2.1.0 -commonmark==0.9.1 coverage[toml]==6.4.3 cython==0.29.32 +datajunction==0.0.1a102 dill==0.3.6 distlib==0.3.5 +execnet==2.1.1 filelock==3.8.0 freezegun==1.2.2 frozenlist==1.3.1 +grapheme==0.6.0 greenlet==1.1.3.post0 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 identify==2.5.3 idna==3.3 iniconfig==1.1.1 isort==5.10.1 jinja2==3.1.2 lazy-object-proxy==1.8.0 +markdown-it-py==3.0.0 markupsafe==2.1.1 marshmallow==3.17.0 mccabe==0.7.0 +mdurl==0.1.2 multidict==6.0.2 nodeenv==1.7.0 numpy==1.23.1 @@ -51,20 +61,22 @@ prison==0.2.1 prompt-toolkit==3.0.30 py==1.11.0 pyfakefs==4.6.3 -pygments==2.12.0 +pygments==2.19.1 pylint==2.15.5 pyparsing==3.0.9 pytest==7.1.2 pytest-cov==3.0.0 pytest-mock==3.8.2 +pytest-xdist==3.7.0 python-dateutil==2.8.2 python-graphql-client==0.4.3 pytz==2022.2 -pyyaml==6.0 -requests==2.27.1 +pyyaml==6.0.2 +requests==2.32.4 requests-mock==1.9.3 -rich==12.5.1 +rich==14.0.0 six==1.16.0 +sniffio==1.3.1 soupsieve==2.3.2.post1 sqlalchemy==1.4.40 sqlglot==26.23.0 @@ -72,7 +84,7 @@ tabulate==0.8.10 toml==0.10.2 tomli==2.0.1 tomlkit==0.11.6 -typing-extensions==4.3.0 +typing-extensions==4.14.0 urllib3==1.26.9 virtualenv==20.16.3 wcwidth==0.2.5 diff --git a/setup.cfg b/setup.cfg index 4cb4024..4521ff1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -100,6 +100,7 @@ testing = pre-commit pip-tools>=6.6.0 pylint==2.15.5 + datajunction [options.entry_points] # Add here console scripts like: diff --git a/src/preset_cli/api/clients/superset.py b/src/preset_cli/api/clients/superset.py index d433106..ad8e293 100644 --- a/src/preset_cli/api/clients/superset.py +++ b/src/preset_cli/api/clients/superset.py @@ -231,7 +231,6 @@ class OwnershipType(TypedDict): class SupersetClient: # pylint: disable=too-many-public-methods - """ A client for running queries against Superset. """ @@ -360,9 +359,11 @@ def get_data( # pylint: disable=too-many-locals, too-many-arguments # and order bys processed_orderbys = [ - (orderby, not order_desc) - if orderby in metric_names - else (convert_to_adhoc_metric(orderby), not order_desc) + ( + (orderby, not order_desc) + if orderby in metric_names + else (convert_to_adhoc_metric(orderby), not order_desc) + ) for orderby in (order_by or []) ] diff --git a/src/preset_cli/cli/superset/sync/dj/__init__.py b/src/preset_cli/cli/superset/sync/dj/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/preset_cli/cli/superset/sync/dj/command.py b/src/preset_cli/cli/superset/sync/dj/command.py new file mode 100644 index 0000000..5b62272 --- /dev/null +++ b/src/preset_cli/cli/superset/sync/dj/command.py @@ -0,0 +1,90 @@ +""" +A command to sync DJ cubes into a Superset instance. +""" + +from __future__ import annotations + +import logging +from uuid import UUID + +import click +from datajunction import DJClient +from yarl import URL + +from preset_cli.api.clients.superset import SupersetClient +from preset_cli.cli.superset.sync.dj.lib import sync_cube +from preset_cli.lib import split_comma + +_logger = logging.getLogger(__name__) + + +@click.command() +@click.option( + "--database-uuid", + required=True, + help="Database UUID", +) +@click.option( + "--schema", + required=True, + help="Schema where virtual dataset will be created", +) +@click.option( + "--cubes", + callback=split_comma, + help="Comma-separated list of cubes to sync", +) +@click.option( + "dj_url", + "--dj-url", + required=True, + help="DJ URL", + default="http://localhost:8000", +) +@click.option( + "dj_username", + "--dj-username", + required=True, + help="DJ username", + default="dj", +) +@click.option( + "dj_password", + "--dj-password", + required=True, + help="DJ password", + default="dj", +) +@click.option("--external-url-prefix", default="", help="Base URL for resources") +@click.pass_context +def dj( # pylint: disable=invalid-name,too-many-arguments + ctx: click.core.Context, + database_uuid: str, + schema: str, + cubes: list[str], + dj_url: str, + dj_username: str, + dj_password: str, + external_url_prefix: str = "", +) -> None: + """ + Sync DJ cubes to Superset. + """ + superset_auth = ctx.obj["AUTH"] + superset_url = URL(ctx.obj["INSTANCE"]) + superset_client = SupersetClient(superset_url, superset_auth) + + dj_client = DJClient(dj_url) + dj_client.basic_login(dj_username, dj_password) + + base_url = URL(external_url_prefix) if external_url_prefix else None + + for cube in cubes: + sync_cube( + UUID(database_uuid), + schema, + dj_client, + superset_client, + cube, + base_url, + ) diff --git a/src/preset_cli/cli/superset/sync/dj/lib.py b/src/preset_cli/cli/superset/sync/dj/lib.py new file mode 100644 index 0000000..52564be --- /dev/null +++ b/src/preset_cli/cli/superset/sync/dj/lib.py @@ -0,0 +1,154 @@ +""" +Helper functions for DJ sync. +""" + +import json +from typing import Any, Optional +from uuid import UUID + +from datajunction import DJClient +from yarl import URL + +from preset_cli.api.clients.superset import SupersetClient +from preset_cli.api.operators import OneToMany + + +def sync_cube( # pylint: disable=too-many-arguments + database_uuid: UUID, + schema: str, + dj_client: DJClient, + superset_client: SupersetClient, + cube: str, + base_url: Optional[URL], +) -> None: + """ + Sync a DJ cube to a Superset virtual dataset. + """ + response = dj_client._session.post( # pylint: disable=protected-access + "/graphql", + json={ + "query": """ +query FindCubes($names:[String!], $tags: [String!]) { + findNodes(names: $names, tags: $tags, nodeTypes: [CUBE]) { + name + current { + description + displayName + cubeMetrics { + name + description + extractedMeasures { + derivedExpression + } + } + cubeDimensions { + name + } + } + } +} + """, + "variables": {"names": [cube]}, + }, + ) + payload = response.json() + description = payload["data"]["findNodes"][0]["current"]["description"] + columns = [ + dimension["name"] + for dimension in payload["data"]["findNodes"][0]["current"]["cubeDimensions"] + ] + metrics = [ + { + "metric_name": metric["name"], + "expression": metric["extractedMeasures"]["derivedExpression"], + "description": metric["description"], + } + for metric in payload["data"]["findNodes"][0]["current"]["cubeMetrics"] + ] + + response = dj_client._session.post( # pylint: disable=protected-access + "/graphql", + json={ + "query": """ +query MeasuresSql($metrics: [String!]!, $dimensions: [String!]!) { + measuresSql( + cube: {metrics: $metrics, dimensions: $dimensions, filters: []} + preaggregate: true + ) { + sql + } +} + """, + "variables": { + "metrics": [metric["metric_name"] for metric in metrics], + "dimensions": columns, + }, + }, + ) + payload = response.json() + sql = payload["data"]["measuresSql"][0]["sql"] + + database = get_database(superset_client, database_uuid) + dataset = get_or_create_dataset(superset_client, database, schema, cube, sql) + + superset_client.update_dataset( + dataset["id"], + override_columns=True, + metrics=[], + ) + + superset_client.update_dataset( + dataset["id"], + override_columns=False, + metrics=metrics, + description=description, + is_managed_externally=True, + external_url=base_url / "nodes" / cube if base_url else None, + extra=json.dumps( + { + "certification": { + "certified_by": "DJ", + "details": "This table is created by DJ.", + }, + }, + ), + sql=sql, + ) + + +def get_database(superset_client: SupersetClient, uuid: UUID) -> dict[str, Any]: + """ + Get database info given its UUID. + """ + databases = superset_client.get_databases(uuid=str(uuid)) + if not databases: + raise ValueError(f"Database with UUID {uuid} not found in Superset.") + + return databases[0] + + +def get_or_create_dataset( + superset_client: SupersetClient, + database: dict[str, Any], + schema: str, + cube: str, + sql: str, +) -> dict[str, Any]: + """ + Get or create a dataset in Superset. + """ + if existing := superset_client.get_datasets( + database=OneToMany(database["id"]), # type: ignore + schema=schema, + table_name=cube, + ): + dataset = existing[0] + return superset_client.get_dataset(dataset["id"]) + + return superset_client.create_dataset( + database=database["id"], + catalog=None, + schema=schema, + table_name=cube, + sql=sql, + ) diff --git a/src/preset_cli/cli/superset/sync/main.py b/src/preset_cli/cli/superset/sync/main.py index 1fee410..623b0ba 100644 --- a/src/preset_cli/cli/superset/sync/main.py +++ b/src/preset_cli/cli/superset/sync/main.py @@ -5,6 +5,7 @@ import click from preset_cli.cli.superset.sync.dbt.command import dbt_cloud, dbt_core +from preset_cli.cli.superset.sync.dj.command import dj from preset_cli.cli.superset.sync.native.command import native @@ -16,6 +17,7 @@ def sync() -> None: sync.add_command(native) +sync.add_command(dj) sync.add_command(dbt_cloud, name="dbt-cloud") sync.add_command(dbt_core, name="dbt-core") # for backwards compatibility diff --git a/tests/cli/superset/sync/dj/__init__.py b/tests/cli/superset/sync/dj/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cli/superset/sync/dj/command_test.py b/tests/cli/superset/sync/dj/command_test.py new file mode 100644 index 0000000..aabe77c --- /dev/null +++ b/tests/cli/superset/sync/dj/command_test.py @@ -0,0 +1,61 @@ +""" +Tests for the DJ sync command. +""" + +# pylint: disable=invalid-name + +from uuid import UUID + +from click.testing import CliRunner +from pytest_mock import MockerFixture +from yarl import URL + +from preset_cli.cli.superset.main import superset_cli + + +def test_dj_command(mocker: MockerFixture) -> None: + """ + Tests for the sync command. + """ + SupersetClient = mocker.patch( + "preset_cli.cli.superset.sync.dj.command.SupersetClient", + ) + UsernamePasswordAuth = mocker.patch( + "preset_cli.cli.superset.main.UsernamePasswordAuth", + ) + DJClient = mocker.patch("preset_cli.cli.superset.sync.dj.command.DJClient") + sync_cube = mocker.patch("preset_cli.cli.superset.sync.dj.command.sync_cube") + + runner = CliRunner() + result = runner.invoke( + superset_cli, + [ + "https://superset.example.org/", + "sync", + "dj", + "--cubes", + "default.repair_orders_cube", + "--database-uuid", + "a1ad7bd5-b1a3-4d64-afb1-a84c2f4d7715", + "--schema", + "schema", + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + + SupersetClient.assert_called_once_with( + URL("https://superset.example.org/"), + UsernamePasswordAuth(), + ) + DJClient.assert_called_once_with("http://localhost:8000") + DJClient().basic_login.assert_called_once_with("dj", "dj") + + sync_cube.assert_called_once_with( + UUID("a1ad7bd5-b1a3-4d64-afb1-a84c2f4d7715"), + "schema", + DJClient(), + SupersetClient(), + "default.repair_orders_cube", + None, + ) diff --git a/tests/cli/superset/sync/dj/lib_test.py b/tests/cli/superset/sync/dj/lib_test.py new file mode 100644 index 0000000..11a2195 --- /dev/null +++ b/tests/cli/superset/sync/dj/lib_test.py @@ -0,0 +1,732 @@ +""" +Tests for the DJ helpers. +""" + +# pylint: disable=line-too-long + +import json +from uuid import UUID + +import pytest +from pytest_mock import MockerFixture +from yarl import URL + +from preset_cli.cli.superset.sync.dj.lib import ( + get_database, + get_or_create_dataset, + sync_cube, +) + + +def test_sync_cube(mocker: MockerFixture) -> None: + """ + Test the `sync_cube` function. + """ + dj_client = mocker.MagicMock() + dj_client._session.post().json.side_effect = [ # pylint: disable=protected-access + { + "data": { + "findNodes": [ + { + "name": "default.repair_orders_cube", + "current": { + "description": "Repair Orders Cube", + "displayName": "Repair Orders Cube", + "cubeMetrics": [ + { + "name": "default.avg_repair_price", + "description": "Average repair price", + "extractedMeasures": { + "derivedExpression": "SUM(price_sum_252381cf) / SUM(price_count_252381cf)", + }, + }, + { + "name": "default.total_repair_cost", + "description": "Total repair cost", + "extractedMeasures": { + "derivedExpression": "sum(price_sum_252381cf)", + }, + }, + { + "name": "default.total_repair_order_discounts", + "description": "Total repair order discounts", + "extractedMeasures": { + "derivedExpression": "sum(price_discount_sum_94fc7ec3)", + }, + }, + ], + "cubeDimensions": [ + {"name": "default.dispatcher.company_name"}, + {"name": "default.hard_hat.state"}, + ], + }, + }, + ], + }, + }, + { + "data": { + "measuresSql": [ + { + "sql": """WITH default_DOT_repair_order_details AS ( + SELECT + default_DOT_repair_order_details.repair_order_id, + default_DOT_repair_order_details.repair_type_id, + default_DOT_repair_order_details.price, + default_DOT_repair_order_details.quantity, + default_DOT_repair_order_details.discount + FROM roads.repair_order_details AS default_DOT_repair_order_details +), default_DOT_repair_order AS ( + SELECT + default_DOT_repair_orders.repair_order_id, + default_DOT_repair_orders.municipality_id, + default_DOT_repair_orders.hard_hat_id, + default_DOT_repair_orders.dispatcher_id + FROM roads.repair_orders AS default_DOT_repair_orders +), default_DOT_dispatcher AS ( + SELECT + default_DOT_dispatchers.dispatcher_id, + default_DOT_dispatchers.company_name, + default_DOT_dispatchers.phone + FROM roads.dispatchers AS default_DOT_dispatchers +), default_DOT_hard_hat AS ( + SELECT + default_DOT_hard_hats.hard_hat_id, + default_DOT_hard_hats.last_name, + default_DOT_hard_hats.first_name, + default_DOT_hard_hats.title, + default_DOT_hard_hats.birth_date, + default_DOT_hard_hats.hire_date, + default_DOT_hard_hats.address, + default_DOT_hard_hats.city, + default_DOT_hard_hats.state, + default_DOT_hard_hats.postal_code, + default_DOT_hard_hats.country, + default_DOT_hard_hats.manager, + default_DOT_hard_hats.contractor_id + FROM roads.hard_hats AS default_DOT_hard_hats +), default_DOT_repair_order_details_built AS ( + SELECT + default_DOT_repair_order_details.price, + default_DOT_repair_order_details.discount, + default_DOT_dispatcher.company_name AS default_DOT_dispatcher_DOT_company_name, + default_DOT_hard_hat.state AS default_DOT_hard_hat_DOT_state + FROM default_DOT_repair_order_details + LEFT JOIN default_DOT_repair_order + ON default_DOT_repair_order_details.repair_order_id = default_DOT_repair_order.repair_order_id + LEFT JOIN default_DOT_dispatcher + ON default_DOT_repair_order.dispatcher_id = default_DOT_dispatcher.dispatcher_id + LEFT JOIN default_DOT_hard_hat + ON default_DOT_repair_order.hard_hat_id = default_DOT_hard_hat.hard_hat_id +) +SELECT + default_DOT_repair_order_details_built.default_DOT_dispatcher_DOT_company_name, + default_DOT_repair_order_details_built.default_DOT_hard_hat_DOT_state, + COUNT(price) AS price_count_252381cf, + SUM(price) AS price_sum_252381cf, + SUM(price * discount) AS price_discount_sum_94fc7ec3 +FROM default_DOT_repair_order_details_built +GROUP BY + default_DOT_repair_order_details_built.default_DOT_dispatcher_DOT_company_name, + default_DOT_repair_order_details_built.default_DOT_hard_hat_DOT_state""", + }, + ], + }, + }, + ] + superset_client = mocker.MagicMock() + mocker.patch( + "preset_cli.cli.superset.sync.dj.lib.get_database", + return_value={ + "allow_ctas": False, + "allow_cvas": False, + "allow_dml": False, + "allow_file_upload": False, + "allow_multi_catalog": False, + "allow_run_async": False, + "allows_cost_estimate": False, + "allows_subquery": True, + "allows_virtual_table_explore": True, + "backend": "duckdb", + "changed_by": {"first_name": "Superset", "last_name": "Admin"}, + "changed_on": "2025-06-18T21:20:37.785659", + "changed_on_delta_humanized": "an hour ago", + "created_by": {"first_name": "Superset", "last_name": "Admin"}, + "database_name": "DuckDB", + "disable_data_preview": False, + "disable_drill_to_detail": False, + "engine_information": { + "disable_ssh_tunneling": False, + "supports_dynamic_catalog": False, + "supports_file_upload": True, + "supports_oauth2": False, + }, + "explore_database_id": 2, + "expose_in_sqllab": True, + "extra": '{"allows_virtual_table_explore":true}', + "force_ctas_schema": None, + "id": 2, + "uuid": "a1ad7bd5-b1a3-4d64-afb1-a84c2f4d7715", + }, + ) + mocker.patch( + "preset_cli.cli.superset.sync.dj.lib.get_or_create_dataset", + return_value={ + "data": { + "always_filter_main_dttm": False, + "cache_timeout": None, + "catalog": None, + "column_formats": {}, + "columns": [ + { + "advanced_data_type": None, + "certification_details": None, + "certified_by": None, + "column_name": "default_DOT_dispatcher_DOT_company_name", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 779, + "is_certified": False, + "is_dttm": False, + "python_date_format": None, + "type": "STRING", + "type_generic": 1, + "uuid": "442043ef-96aa-4860-92e4-fd3767d2f237", + "verbose_name": None, + "warning_markdown": None, + }, + { + "advanced_data_type": None, + "certification_details": None, + "certified_by": None, + "column_name": "default_DOT_hard_hat_DOT_state", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 780, + "is_certified": False, + "is_dttm": False, + "python_date_format": None, + "type": "STRING", + "type_generic": 1, + "uuid": "95e28283-f0f0-4047-bc7e-ceba94032d62", + "verbose_name": None, + "warning_markdown": None, + }, + { + "advanced_data_type": None, + "certification_details": None, + "certified_by": None, + "column_name": "price_count_252381cf", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 781, + "is_certified": False, + "is_dttm": False, + "python_date_format": None, + "type": "NUMBER", + "type_generic": None, + "uuid": "312d8a53-60bb-472f-b17e-a4b375fc8835", + "verbose_name": None, + "warning_markdown": None, + }, + { + "advanced_data_type": None, + "certification_details": None, + "certified_by": None, + "column_name": "price_sum_252381cf", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 782, + "is_certified": False, + "is_dttm": False, + "python_date_format": None, + "type": "NUMBER", + "type_generic": None, + "uuid": "3cff4dcc-fc3c-4b86-bcf9-de84d6073681", + "verbose_name": None, + "warning_markdown": None, + }, + { + "advanced_data_type": None, + "certification_details": None, + "certified_by": None, + "column_name": "price_discount_sum_94fc7ec3", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 783, + "is_certified": False, + "is_dttm": False, + "python_date_format": None, + "type": "NUMBER", + "type_generic": None, + "uuid": "f6c7f69b-4005-49cf-af61-2c41f8fea173", + "verbose_name": None, + "warning_markdown": None, + }, + ], + "database": { + "allow_multi_catalog": False, + "allows_cost_estimate": False, + "allows_subquery": True, + "allows_virtual_table_explore": True, + "backend": "duckdb", + "configuration_method": "sqlalchemy_form", + "disable_data_preview": False, + "disable_drill_to_detail": False, + "engine_information": { + "disable_ssh_tunneling": False, + "supports_dynamic_catalog": False, + "supports_file_upload": True, + "supports_oauth2": False, + }, + "explore_database_id": 2, + "id": 2, + "name": "DuckDB", + "parameters": { + "access_token": "", + "database": "/app/default.duckdb", + "query": {}, + }, + "parameters_schema": { + "properties": { + "access_token": { + "default": "https://app.motherduck.com/token-request?appName=Superset&close=y", + "description": "MotherDuck token", + "nullable": True, + "type": "string", + }, + "database": { + "description": "Database name", + "type": "string", + }, + "query": { + "additionalProperties": {}, + "description": "Additional parameters", + "type": "object", + }, + }, + "type": "object", + }, + "schema_options": {}, + }, + "datasource_name": "default.repair_orders_cube", + "default_endpoint": None, + "description": None, + "edit_url": "/tablemodelview/edit/28", + "extra": None, + "fetch_values_predicate": None, + "filter_select": True, + "filter_select_enabled": True, + "folders": None, + "granularity_sqla": [], + "health_check_message": None, + "id": 28, + "is_sqllab_view": False, + "main_dttm_col": None, + "metrics": [ + { + "certification_details": None, + "certified_by": None, + "currency": None, + "d3format": None, + "description": None, + "expression": "COUNT(*)", + "id": 76, + "is_certified": False, + "metric_name": "count", + "uuid": "a8f2a5da-1986-41b7-a846-e72d9747e959", + "verbose_name": "COUNT(*)", + "warning_markdown": None, + "warning_text": None, + }, + ], + "name": "roads.default.repair_orders_cube", + "normalize_columns": False, + "offset": 0, + "order_by_choices": [ + [ + '["default_DOT_dispatcher_DOT_company_name", true]', + "default_DOT_dispatcher_DOT_company_name [asc]", + ], + [ + '["default_DOT_dispatcher_DOT_company_name", false]', + "default_DOT_dispatcher_DOT_company_name [desc]", + ], + [ + '["default_DOT_hard_hat_DOT_state", true]', + "default_DOT_hard_hat_DOT_state [asc]", + ], + [ + '["default_DOT_hard_hat_DOT_state", false]', + "default_DOT_hard_hat_DOT_state [desc]", + ], + ['["price_count_252381cf", true]', "price_count_252381cf [asc]"], + ['["price_count_252381cf", false]', "price_count_252381cf [desc]"], + [ + '["price_discount_sum_94fc7ec3", true]', + "price_discount_sum_94fc7ec3 [asc]", + ], + [ + '["price_discount_sum_94fc7ec3", false]', + "price_discount_sum_94fc7ec3 [desc]", + ], + ['["price_sum_252381cf", true]', "price_sum_252381cf [asc]"], + ['["price_sum_252381cf", false]', "price_sum_252381cf [desc]"], + ], + "owners": [ + { + "first_name": "Superset", + "id": 1, + "last_name": "Admin", + "username": "admin", + }, + ], + "params": None, + "perm": "[DuckDB].[default.repair_orders_cube](id:28)", + "schema": "roads", + "select_star": 'SELECT\n *\nFROM roads."default.repair_orders_cube"\nLIMIT 100', + "sql": """WITH default_DOT_repair_order_details AS ( + SELECT + default_DOT_repair_order_details.repair_order_id, + default_DOT_repair_order_details.repair_type_id, + default_DOT_repair_order_details.price, + default_DOT_repair_order_details.quantity, + default_DOT_repair_order_details.discount + FROM roads.repair_order_details AS default_DOT_repair_order_details +), default_DOT_repair_order AS ( + SELECT + default_DOT_repair_orders.repair_order_id, + default_DOT_repair_orders.municipality_id, + default_DOT_repair_orders.hard_hat_id, + default_DOT_repair_orders.dispatcher_id + FROM roads.repair_orders AS default_DOT_repair_orders +), default_DOT_dispatcher AS ( + SELECT + default_DOT_dispatchers.dispatcher_id, + default_DOT_dispatchers.company_name, + default_DOT_dispatchers.phone + FROM roads.dispatchers AS default_DOT_dispatchers +), default_DOT_hard_hat AS ( + SELECT + default_DOT_hard_hats.hard_hat_id, + default_DOT_hard_hats.last_name, + default_DOT_hard_hats.first_name, + default_DOT_hard_hats.title, + default_DOT_hard_hats.birth_date, + default_DOT_hard_hats.hire_date, + default_DOT_hard_hats.address, + default_DOT_hard_hats.city, + default_DOT_hard_hats.state, + default_DOT_hard_hats.postal_code, + default_DOT_hard_hats.country, + default_DOT_hard_hats.manager, + default_DOT_hard_hats.contractor_id + FROM roads.hard_hats AS default_DOT_hard_hats +), default_DOT_repair_order_details_built AS ( + SELECT + default_DOT_repair_order_details.price, + default_DOT_repair_order_details.discount, + default_DOT_dispatcher.company_name AS default_DOT_dispatcher_DOT_company_name, + default_DOT_hard_hat.state AS default_DOT_hard_hat_DOT_state + FROM default_DOT_repair_order_details + LEFT JOIN default_DOT_repair_order + ON default_DOT_repair_order_details.repair_order_id = default_DOT_repair_order.repair_order_id + LEFT JOIN default_DOT_dispatcher + ON default_DOT_repair_order.dispatcher_id = default_DOT_dispatcher.dispatcher_id + LEFT JOIN default_DOT_hard_hat + ON default_DOT_repair_order.hard_hat_id = default_DOT_hard_hat.hard_hat_id +) +SELECT + default_DOT_repair_order_details_built.default_DOT_dispatcher_DOT_company_name, + default_DOT_repair_order_details_built.default_DOT_hard_hat_DOT_state, + COUNT(price) AS price_count_252381cf, + SUM(price) AS price_sum_252381cf, + SUM(price * discount) AS price_discount_sum_94fc7ec3 +FROM default_DOT_repair_order_details_built +GROUP BY + default_DOT_repair_order_details_built.default_DOT_dispatcher_DOT_company_name, + default_DOT_repair_order_details_built.default_DOT_hard_hat_DOT_state""", + "table_name": "default.repair_orders_cube", + "template_params": None, + "time_grain_sqla": [ + ["PT1S", "Second"], + ["PT1M", "Minute"], + ["PT1H", "Hour"], + ["P1D", "Day"], + ["P1W", "Week"], + ["P1M", "Month"], + ["P3M", "Quarter"], + ["P1Y", "Year"], + ], + "type": "table", + "uid": "28__table", + "verbose_map": { + "__timestamp": "Time", + "count": "COUNT(*)", + "default_DOT_dispatcher_DOT_company_name": "default_DOT_dispatcher_DOT_company_name", + "default_DOT_hard_hat_DOT_state": "default_DOT_hard_hat_DOT_state", + "price_count_252381cf": "price_count_252381cf", + "price_discount_sum_94fc7ec3": "price_discount_sum_94fc7ec3", + "price_sum_252381cf": "price_sum_252381cf", + }, + }, + "id": 28, + "result": { + "always_filter_main_dttm": False, + "catalog": None, + "database": 2, + "normalize_columns": False, + "schema": "roads", + "sql": """WITH default_DOT_repair_order_details AS ( + SELECT + default_DOT_repair_order_details.repair_order_id, + default_DOT_repair_order_details.repair_type_id, + default_DOT_repair_order_details.price, + default_DOT_repair_order_details.quantity, + default_DOT_repair_order_details.discount + FROM roads.repair_order_details AS default_DOT_repair_order_details +), default_DOT_repair_order AS ( + SELECT + default_DOT_repair_orders.repair_order_id, + default_DOT_repair_orders.municipality_id, + default_DOT_repair_orders.hard_hat_id, + default_DOT_repair_orders.dispatcher_id + FROM roads.repair_orders AS default_DOT_repair_orders +), default_DOT_dispatcher AS ( + SELECT + default_DOT_dispatchers.dispatcher_id, + default_DOT_dispatchers.company_name, + default_DOT_dispatchers.phone + FROM roads.dispatchers AS default_DOT_dispatchers +), default_DOT_hard_hat AS ( + SELECT + default_DOT_hard_hats.hard_hat_id, + default_DOT_hard_hats.last_name, + default_DOT_hard_hats.first_name, + default_DOT_hard_hats.title, + default_DOT_hard_hats.birth_date, + default_DOT_hard_hats.hire_date, + default_DOT_hard_hats.address, + default_DOT_hard_hats.city, + default_DOT_hard_hats.state, + default_DOT_hard_hats.postal_code, + default_DOT_hard_hats.country, + default_DOT_hard_hats.manager, + default_DOT_hard_hats.contractor_id + FROM roads.hard_hats AS default_DOT_hard_hats +), default_DOT_repair_order_details_built AS ( + SELECT + default_DOT_repair_order_details.price, + default_DOT_repair_order_details.discount, + default_DOT_dispatcher.company_name AS default_DOT_dispatcher_DOT_company_name, + default_DOT_hard_hat.state AS default_DOT_hard_hat_DOT_state + FROM default_DOT_repair_order_details + LEFT JOIN default_DOT_repair_order + ON default_DOT_repair_order_details.repair_order_id = default_DOT_repair_order.repair_order_id + LEFT JOIN default_DOT_dispatcher + ON default_DOT_repair_order.dispatcher_id = default_DOT_dispatcher.dispatcher_id + LEFT JOIN default_DOT_hard_hat + ON default_DOT_repair_order.hard_hat_id = default_DOT_hard_hat.hard_hat_id +) +SELECT + default_DOT_repair_order_details_built.default_DOT_dispatcher_DOT_company_name, + default_DOT_repair_order_details_built.default_DOT_hard_hat_DOT_state, + COUNT(price) AS price_count_252381cf, + SUM(price) AS price_sum_252381cf, + SUM(price * discount) AS price_discount_sum_94fc7ec3 +FROM default_DOT_repair_order_details_built +GROUP BY + default_DOT_repair_order_details_built.default_DOT_dispatcher_DOT_company_name, + default_DOT_repair_order_details_built.default_DOT_hard_hat_DOT_state""", + "table_name": "default.repair_orders_cube", + }, + }, + ) + + sync_cube( + UUID("a1ad7bd5-b1a3-4d64-afb1-a84c2f4d7715"), + "schema", + dj_client, + superset_client, + "test_cube", + URL("https://dj.example.org/"), + ) + + superset_client.update_dataset.assert_has_calls( + [ + mocker.call(28, override_columns=True, metrics=[]), + mocker.call( + 28, + override_columns=False, + metrics=[ + { + "metric_name": "default.avg_repair_price", + "expression": "SUM(price_sum_252381cf) / SUM(price_count_252381cf)", + "description": "Average repair price", + }, + { + "metric_name": "default.total_repair_cost", + "expression": "sum(price_sum_252381cf)", + "description": "Total repair cost", + }, + { + "metric_name": "default.total_repair_order_discounts", + "expression": "sum(price_discount_sum_94fc7ec3)", + "description": "Total repair order discounts", + }, + ], + description="Repair Orders Cube", + is_managed_externally=True, + external_url=URL("https://dj.example.org/nodes/test_cube"), + extra=json.dumps( + { + "certification": { + "certified_by": "DJ", + "details": "This table is created by DJ.", + }, + }, + ), + sql="""WITH default_DOT_repair_order_details AS ( + SELECT + default_DOT_repair_order_details.repair_order_id, + default_DOT_repair_order_details.repair_type_id, + default_DOT_repair_order_details.price, + default_DOT_repair_order_details.quantity, + default_DOT_repair_order_details.discount + FROM roads.repair_order_details AS default_DOT_repair_order_details +), default_DOT_repair_order AS ( + SELECT + default_DOT_repair_orders.repair_order_id, + default_DOT_repair_orders.municipality_id, + default_DOT_repair_orders.hard_hat_id, + default_DOT_repair_orders.dispatcher_id + FROM roads.repair_orders AS default_DOT_repair_orders +), default_DOT_dispatcher AS ( + SELECT + default_DOT_dispatchers.dispatcher_id, + default_DOT_dispatchers.company_name, + default_DOT_dispatchers.phone + FROM roads.dispatchers AS default_DOT_dispatchers +), default_DOT_hard_hat AS ( + SELECT + default_DOT_hard_hats.hard_hat_id, + default_DOT_hard_hats.last_name, + default_DOT_hard_hats.first_name, + default_DOT_hard_hats.title, + default_DOT_hard_hats.birth_date, + default_DOT_hard_hats.hire_date, + default_DOT_hard_hats.address, + default_DOT_hard_hats.city, + default_DOT_hard_hats.state, + default_DOT_hard_hats.postal_code, + default_DOT_hard_hats.country, + default_DOT_hard_hats.manager, + default_DOT_hard_hats.contractor_id + FROM roads.hard_hats AS default_DOT_hard_hats +), default_DOT_repair_order_details_built AS ( + SELECT + default_DOT_repair_order_details.price, + default_DOT_repair_order_details.discount, + default_DOT_dispatcher.company_name AS default_DOT_dispatcher_DOT_company_name, + default_DOT_hard_hat.state AS default_DOT_hard_hat_DOT_state + FROM default_DOT_repair_order_details + LEFT JOIN default_DOT_repair_order + ON default_DOT_repair_order_details.repair_order_id = default_DOT_repair_order.repair_order_id + LEFT JOIN default_DOT_dispatcher + ON default_DOT_repair_order.dispatcher_id = default_DOT_dispatcher.dispatcher_id + LEFT JOIN default_DOT_hard_hat + ON default_DOT_repair_order.hard_hat_id = default_DOT_hard_hat.hard_hat_id +) +SELECT + default_DOT_repair_order_details_built.default_DOT_dispatcher_DOT_company_name, + default_DOT_repair_order_details_built.default_DOT_hard_hat_DOT_state, + COUNT(price) AS price_count_252381cf, + SUM(price) AS price_sum_252381cf, + SUM(price * discount) AS price_discount_sum_94fc7ec3 +FROM default_DOT_repair_order_details_built +GROUP BY + default_DOT_repair_order_details_built.default_DOT_dispatcher_DOT_company_name, + default_DOT_repair_order_details_built.default_DOT_hard_hat_DOT_state""", + ), + ], + ) + + +def test_get_database(mocker: MockerFixture) -> None: + """ + Test the `get_database` function. + """ + superset_client = mocker.MagicMock() + superset_client.get_databases.return_value = [{"id": 1, "name": "TestDB"}] + + assert get_database( + superset_client, + UUID("a1ad7bd5-b1a3-4d64-afb1-a84c2f4d7715"), + ) == {"id": 1, "name": "TestDB"} + + +def test_get_database_not_found(mocker: MockerFixture) -> None: + """ + Test the `get_database` function. + """ + superset_client = mocker.MagicMock() + superset_client.get_databases.return_value = [] + + with pytest.raises(ValueError) as excinfo: + get_database(superset_client, UUID("a1ad7bd5-b1a3-4d64-afb1-a84c2f4d7715")) + assert ( + str(excinfo.value) + == "Database with UUID a1ad7bd5-b1a3-4d64-afb1-a84c2f4d7715 not found in Superset." + ) + + +def test_get_or_create_dataset_existing(mocker: MockerFixture) -> None: + """ + Test the `get_or_create_dataset` function when the dataset exists. + """ + superset_client = mocker.MagicMock() + superset_client.get_datasets.return_value = [{"id": 42}] + + get_or_create_dataset( + superset_client, + {"id": 1, "name": "TestDB"}, + "schema", + "test_cube", + "SELECT * FROM t", + ) + + superset_client.get_dataset.assert_called_once_with(42) + + +def test_get_or_create_dataset_new(mocker: MockerFixture) -> None: + """ + Test the `get_or_create_dataset` function for creating a new dataset. + """ + superset_client = mocker.MagicMock() + superset_client.get_datasets.return_value = [] + + get_or_create_dataset( + superset_client, + {"id": 1, "name": "TestDB"}, + "schema", + "test_cube", + "SELECT * FROM t", + ) + + superset_client.create_dataset.assert_called_once_with( + database=1, + catalog=None, + schema="schema", + table_name="test_cube", + sql="SELECT * FROM t", + )