Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions data-warehousing/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,15 @@
# Data Warehousing

## Projects

### [Databricks Metric Views](./dbrx-metric-views/)

A demo showcasing how to use Unity Catalog Metric Views in Databricks to define semantic models directly on the platform. Built on top of the [Retail Store Star Schema Dataset](https://www.kaggle.com/datasets/shrinivasv/retail-store-star-schema-dataset?select=fact_sales_denormalized.csv), it demonstrates how embedding your semantic layer in Databricks provides unified governance through Unity Catalog alongside optimal query performance — eliminating the need for external semantic modeling tools.

### [Genie Space CI/CD](./genie-cicd/)

An automated CI/CD pipeline for promoting Databricks AI/BI Genie spaces across environments. The project uses Databricks Asset Bundles (DABs) to export a Genie space configuration from a Dev workspace, version-control it in Git, and deploy it to a Prod workspace with automatic Unity Catalog catalog/schema reference replacement. It supports both creating new and updating existing Genie spaces, runs on serverless compute by default, and is ready to integrate with CI/CD platforms like GitHub Actions or Azure DevOps.

### [Genie Room Creation](./genie-room-creation/)

A Databricks notebook that enables programmatic creation of AI/BI Genie spaces using the Databricks Python SDK and interactive widgets. It provides a guided, widget-driven experience for configuring a new Genie space — including title, description, warehouse selection, table identifiers, and sample instructions — all without writing manual HTTP requests. The notebook also demonstrates advanced patterns such as listing existing spaces, customizing data sources with sample questions, and leveraging the SDK's built-in authentication and retry capabilities.
275 changes: 275 additions & 0 deletions data-warehousing/dbrx-metric-views/0_IngestData.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "318cb8e6-9c5e-4fc3-bf7f-c3222c1c47b6",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"outputs": [],
"source": [
"!pip install kagglehub\n",
"dbutils.library.restartPython()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "9918dd73-dce5-4f93-92d8-fefdcbcb26c8",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"outputs": [],
"source": [
"# Define parameters\n",
"dbutils.widgets.text(\"CATALOG_NAME\", \"pedroz_catalog\", \"Catalog Name\")\n",
"dbutils.widgets.text(\"SCHEMA_NAME\", \"metric_views_schema\"s, \"Schema Name\")\n",
"CATALOG_NAME = dbutils.widgets.get(\"CATALOG_NAME\")\n",
"SCHEMA_NAME = dbutils.widgets.get(\"SCHEMA_NAME\")"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "038ad156-d93b-4c82-81a2-415c23c1bb52",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
"# You need to define this variable in order to download the Kaggle files to your Volume\n",
"os.environ[\"KAGGLEHUB_CACHE\"] = f\"/Volumes/pedroz_catalog/metric_views_schema/dimensional_model/kaggle_files\""
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "9277a6c9-63b8-4280-9995-f392d718d481",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"outputs": [],
"source": [
"import kagglehub\n",
"\n",
"# Download latest version\n",
"path = kagglehub.dataset_download(\"shrinivasv/retail-store-star-schema-dataset\")\n",
"\n",
"print(\"Path to dataset files:\", path)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "80f1d37c-096a-4876-a7e4-1e782dc013f7",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"outputs": [],
"source": [
"import glob\n",
"\n",
"csv_folder = path # path variable from kagglehub.dataset_download\n",
"csv_files = glob.glob(f\"{csv_folder}/*.csv\")\n",
"\n",
"for csv_file in csv_files:\n",
" table_name = os.path.splitext(os.path.basename(csv_file))[0]\n",
" df = spark.read.csv(csv_file, header=True, inferSchema=True)\n",
" # Clean column names: replace spaces with underscores\n",
" for c in df.columns:\n",
" df = df.withColumnRenamed(c, c.replace(\" \", \"_\"))\n",
" full_table_name = f\"{CATALOG_NAME}.{SCHEMA_NAME}.{table_name}\"\n",
" spark.sql(f\"DROP TABLE IF EXISTS {full_table_name}\")\n",
" df.write.saveAsTable(full_table_name)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "e8dff81e-29b6-4bfc-988d-610d36b4012c",
"showTitle": false,
"tableResultSettingsMap": {
"0": {
"dataGridStateBlob": "{\"version\":1,\"tableState\":{\"columnPinning\":{\"left\":[\"#row_number#\"],\"right\":[]},\"columnSizing\":{\"database\":179},\"columnVisibility\":{}},\"settings\":{\"columns\":{}},\"syncTimestamp\":1766428355279}",
"filterBlob": null,
"queryPlanFiltersBlob": null,
"tableResultIndex": 0
}
},
"title": ""
}
},
"outputs": [],
"source": [
"display(spark.sql(f\"SHOW TABLES IN {CATALOG_NAME}.{SCHEMA_NAME}\"))"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"implicitDf": true,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "c677244c-bf81-418a-bcac-06053e52f650",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"outputs": [],
"source": [
"%sql\n",
"SELECT *\n",
"FROM $CATALOG_NAME.$SCHEMA_NAME.dim_campaigns\n",
"LIMIT 10"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"computePreferences": {
"hardware": {
"accelerator": null,
"gpuPoolId": null,
"memory": null
}
},
"dashboards": [],
"environmentMetadata": {
"base_environment": "",
"environment_version": "4"
},
"inputWidgetPreferences": null,
"language": "python",
"notebookMetadata": {
"mostRecentlyExecutedCommandWithImplicitDF": {
"commandId": 7680324554559970,
"dataframes": [
"_sqldf"
]
},
"pythonIndentUnit": 2
},
"notebookName": "0_IngestData",
"widgets": {
"CATALOG_NAME": {
"currentValue": "pedroz_catalog",
"nuid": "0f8987ce-14af-4a61-aec6-8f2bdf7e0859",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "pedroz_catalog",
"label": "Catalog Name",
"name": "CATALOG_NAME",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "pedroz_catalog",
"label": "Catalog Name",
"name": "CATALOG_NAME",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
"SCHEMA_NAME": {
"currentValue": "metric_views_schema",
"nuid": "4f108163-255b-4738-83a3-72a2e2d7dc19",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "metric_views_schema",
"label": "Schema Name",
"name": "SCHEMA_NAME",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "metric_views_schema",
"label": "Schema Name",
"name": "SCHEMA_NAME",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
}
}
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading