Improve Create Profiler Dashboard CLI Usage#2319
Improve Create Profiler Dashboard CLI Usage#2319goodwillpunning wants to merge 26 commits intomainfrom
Conversation
|
✅ 145/145 passed, 7 flaky, 4 skipped, 37m39s total Flaky tests:
Running from acceptance #4011 |
sundarshankar89
left a comment
There was a problem hiding this comment.
Let us take a step back, I think the approach needs to be composable
I would approach this as two commands
databricks labs lakebridge profiler-sync
databricks labs lakebridge deploy-profiler-dashboard
profiler-sync is pre-req for deploy-profiler-dashboard
profiler-sync Creates ingestion job and syncs the data into profiler tables within databricks. it is intentionally called sync because it creates infra if not already exists and triggers incremental data load when applied.
deploy-profiler-dashboard just creates AI/BI dashboard one of so that it points agains the tables or objects fails if the object doesn't exists and request user to run profiler-sync before running deploy-profiler-dashboard
Thoughts?
| logging.info(f"Loading dashboard template from folder: {folder}") | ||
| dash_reference = f"{folder.stem}".lower() | ||
| dashboard_loader = ProfilerDashboardTemplateLoader(folder) | ||
| dashboard_json = dashboard_loader.load(source_system="synapse") |
There was a problem hiding this comment.
source system needs to be argument.
There was a problem hiding this comment.
Great catch!
| logger.warning("Profiler Dashboard Config is empty.") | ||
| return | ||
| logger.info("Installing the profiler dashboard components.") | ||
| self._upload_profiler_extract(profiler_dashboard_config) |
There was a problem hiding this comment.
need some error handling here, for catching upload errors and permission checks.
There was a problem hiding this comment.
Agreed. Added better error handling to catch those exceptions.
| logger.info("Uninstalling profiler dashboard components.") | ||
| self._remove_dashboards() | ||
| self._remove_jobs() | ||
| logging.info( |
There was a problem hiding this comment.
| logging.info( | |
| logger.info( |
| self._installation = installation | ||
| self._install_state = install_state | ||
| self._product_info = product_info | ||
| self._table_deployer = table_deployer |
There was a problem hiding this comment.
I don't see it used anywhere may be we can remove it
| self._table_deployer = table_deployer |
There was a problem hiding this comment.
Good catch. Removed.
Codecov Report❌ Patch coverage is Additional details and impacted files@@ Coverage Diff @@
## main #2319 +/- ##
==========================================
+ Coverage 66.41% 67.68% +1.26%
==========================================
Files 99 99
Lines 9094 9271 +177
Branches 974 986 +12
==========================================
+ Hits 6040 6275 +235
+ Misses 2878 2816 -62
- Partials 176 180 +4 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
sundarshankar89
left a comment
There was a problem hiding this comment.
@goodwillpunning everything looks good, there is small improvement I would like to do, which is make the job deployer and dashboard deployer generic so it can be used by both profiler and reconcile job. We can iterate on this do not want block this PR.
| | Amazon Redshift | ❌ | | ||
| | Oracle | ❌ | | ||
| | Microsoft SQL Server | ❌ | | ||
| | Snowflake | ❌ | |
There was a problem hiding this comment.
should we leave it here, may be better to update later or changing to coming soon.
|
|
||
|
|
||
| def _ingest_table(extract_location: str, source_table_name: str, target_table_name: str) -> None: | ||
| def ingest_table(extract_location: str, source_table_name: str, target_table_name: str) -> None: |
There was a problem hiding this comment.
@goodwillpunning is there reason we change all the method signature from private to public?
There was a problem hiding this comment.
To make it easy to unit test without disabling pylint ( # pylint: disable=import-private-name forbidden by the build check).
| if not os.path.exists(filepath): | ||
| raise FileNotFoundError(f"Could not find dashboard template matching {source_system}.") | ||
| with open(filepath, "r", encoding="utf-8") as f: | ||
| return json.load(f) |
There was a problem hiding this comment.
| if not os.path.exists(filepath): | |
| raise FileNotFoundError(f"Could not find dashboard template matching {source_system}.") | |
| with open(filepath, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| try: | |
| with open(filepath, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| except FileNotFoundError: | |
| raise FileNotFoundError(f"Could not find dashboard template matching {source_system}.") |
| """ | ||
|
|
||
| # Load the dashboard template | ||
| logging.info(f"Loading dashboard template from folder: {folder}") |
There was a problem hiding this comment.
| logging.info(f"Loading dashboard template from folder: {folder}") | |
| logger.info(f"Loading dashboard template from folder: {folder}") |
| try: | ||
| dashboard = self._ws.lakeview.create(dashboard=dashboard) | ||
| except ResourceAlreadyExists: | ||
| logging.info("Dashboard already exists! Removing dashboard from workspace location.") |
There was a problem hiding this comment.
| logging.info("Dashboard already exists! Removing dashboard from workspace location.") | |
| logger.info("Dashboard already exists! Removing dashboard from workspace location.") |
| def _configure_new_profiler_dashboard_installation(self) -> ProfilerDashboardConfig: | ||
| default_config = self._prompt_for_new_profiler_dashboard_installation() | ||
| self._save_config(default_config) | ||
| return default_config | ||
|
|
There was a problem hiding this comment.
no need of this
| def _configure_new_profiler_dashboard_installation(self) -> ProfilerDashboardConfig: | |
| default_config = self._prompt_for_new_profiler_dashboard_installation() | |
| self._save_config(default_config) | |
| return default_config |
| return [ | ||
| (job_name, int(job_id)) | ||
| for job_name, job_id in self._install_state.jobs.items() | ||
| if job_name.startswith(_PROFILER_DASHBOARD_PREFIX) and job_name != PROFILER_INGESTION_JOB_NAME | ||
| ] |
There was a problem hiding this comment.
| return [ | |
| (job_name, int(job_id)) | |
| for job_name, job_id in self._install_state.jobs.items() | |
| if job_name.startswith(_PROFILER_DASHBOARD_PREFIX) and job_name != PROFILER_INGESTION_JOB_NAME | |
| ] | |
| return [(name, job_id) for name, job_id in self._get_jobs() if name != PROFILER_INGESTION_JOB_NAME] |
| def test_upload_duckdb_to_uc_volume_invalid_volume_path( | ||
| dashboard_manager: DashboardManager, | ||
| dashboard_manager: ProfilerDashboardManager, | ||
| mocked_workspace_client: WorkspaceClient, | ||
| ): | ||
| ws = mocked_workspace_client | ||
| result = dashboard_manager.upload_duckdb_to_uc_volume( | ||
| local_file_path="file.duckdb", volume_path="invalid_path/myfile.duckdb" | ||
| config = ProfilerDashboardConfig( | ||
| source_tech="synapse", | ||
| extract_file_path="file.duckdb", | ||
| metadata_config=ProfilerDashboardMetadataConfig(catalog="lakebridge", schema="profiler", volume="invalid_path"), | ||
| ) | ||
| result = dashboard_manager.upload_duckdb_to_uc_volume(config) | ||
| assert result is False | ||
| ws.files.upload.assert_not_called() | ||
|
|
There was a problem hiding this comment.
we no longer need this test since we dont branch and test this particular path in dashboard_manager.py
Co-authored-by: SundarShankar89 <72757199+sundarshankar89@users.noreply.github.com>
Co-authored-by: SundarShankar89 <72757199+sundarshankar89@users.noreply.github.com>
|
@goodwillpunning can you resolve conflicts then this can be ready to merge |
Changes
What does this PR do?
This PR updates the deployment of the profiler summary dashboard to be more consistent with other Lakebridge components, such as the recon job and dashboards.
Relevant implementation details
Caveats/things to watch out for when reviewing:
Linked issues
N/A
Functionality
databricks labs lakebridge ...Tests