diff --git a/.github/workflows/sdk-assets-sdkmcpsample-sdkmcpsample.yml b/.github/workflows/sdk-assets-sdkmcpsample-sdkmcpsample.yml new file mode 100644 index 00000000000..d9d00e0a43b --- /dev/null +++ b/.github/workflows/sdk-assets-sdkmcpsample-sdkmcpsample.yml @@ -0,0 +1,94 @@ +# This code is autogenerated. +# Code is generated by running custom script: python3 readme.py +# Any manual changes to this file may cause incorrect behavior. +# Any manual changes will be overwritten if the code is regenerated. + +name: sdk-assets-sdkmcpsample-sdkmcpsample +# This file is created by sdk/python/readme.py. +# Please do not edit directly. +on: + workflow_dispatch: + schedule: + - cron: "4 15/12 * * *" + pull_request: + branches: + - main + paths: + - sdk/python/assets/sdkmcpsample/** + - .github/workflows/sdk-assets-sdkmcpsample-sdkmcpsample.yml + - sdk/python/dev-requirements.txt + - infra/bootstrapping/** + - sdk/python/setup.sh + +permissions: + id-token: write +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: check out repo + uses: actions/checkout@v2 + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + - name: pip install notebook reqs + run: pip install -r sdk/python/dev-requirements.txt + - name: azure login + uses: azure/login@v1 + with: + client-id: ${{ secrets.OIDC_AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.OIDC_AZURE_TENANT_ID }} + subscription-id: ${{ secrets.OIDC_AZURE_SUBSCRIPTION_ID }} + - name: bootstrap resources + run: | + echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; + bash bootstrap.sh + working-directory: infra/bootstrapping + continue-on-error: false + - name: setup SDK + run: | + source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; + source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + bash setup.sh + working-directory: sdk/python + continue-on-error: true + - name: validate readme + run: | + python check-readme.py "${{ github.workspace }}/sdk/python/assets/sdkmcpsample" + working-directory: infra/bootstrapping + continue-on-error: false + - name: setup-cli + run: | + source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; + source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + bash setup.sh + working-directory: cli + continue-on-error: true + - name: Eagerly cache access tokens for required scopes + run: | + # Workaround for azure-cli's lack of support for ID token refresh + # Taken from: https://github.com/Azure/login/issues/372#issuecomment-2056289617 + + # Management + az account get-access-token --scope https://management.azure.com/.default --output none + # ML + az account get-access-token --scope https://ml.azure.com/.default --output none + - name: run assets/sdkmcpsample/sdkmcpsample.ipynb + run: | + source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; + source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; + bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" replace_template_values "sdkmcpsample.ipynb"; + [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; + papermill -k python sdkmcpsample.ipynb sdkmcpsample.output.ipynb + working-directory: sdk/python/assets/sdkmcpsample + - name: upload notebook's working folder as an artifact + if: ${{ always() }} + uses: ./.github/actions/upload-artifact + with: + name: sdkmcpsample + path: sdk/python/assets/sdkmcpsample diff --git a/sdk/python/README.md b/sdk/python/README.md index 6f15a91faad..9e56057443a 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -43,6 +43,7 @@ Test Status is for branch - **_main_** |assets|data|[working_with_mltable](assets/data/working_with_mltable.ipynb)|Read, write and register a data asset|[](https://github.com/Azure/azureml-examples/actions/workflows/sdk-assets-data-working_with_mltable.yml)| |assets|environment|[environment](assets/environment/environment.ipynb)|Create custom environments from docker and/or conda YAML|[](https://github.com/Azure/azureml-examples/actions/workflows/sdk-assets-environment-environment.yml)| |assets|model|[model](assets/model/model.ipynb)|Create model from local files, cloud files, Runs|[](https://github.com/Azure/azureml-examples/actions/workflows/sdk-assets-model-model.yml)| +|assets|sdkmcpsample|[sdkmcpsample](assets/sdkmcpsample/sdkmcpsample.ipynb)|Advanced Azure ML SDK v2 sample with MCP integration, comprehensive analytics, and enterprise-grade ML workflows|[](https://github.com/Azure/azureml-examples/actions/workflows/sdk-assets-sdkmcpsample-sdkmcpsample.yml)| |basic-installation|basic-installation|[basic-installation](basic-installation/basic-installation.ipynb)|Install and do a basic sanity of azure-ai-ml by listing the workspaces available in the resource group.|| |data-wrangling|interactive_data_wrangling.ipynb|[interactive_data_wrangling](data-wrangling/interactive_data_wrangling.ipynb)|*no description* - _This sample is excluded from automated tests_|[](https://github.com/Azure/azureml-examples/actions/workflows/sdk-data-wrangling-interactive_data_wrangling.yml)| |endpoints|batch|[custom-output-batch](endpoints/batch/deploy-models/custom-outputs-parquet/custom-output-batch.ipynb)|*no description*|[](https://github.com/Azure/azureml-examples/actions/workflows/sdk-endpoints-batch-deploy-models-custom-outputs-parquet-custom-output-batch.yml)| diff --git a/sdk/python/assets/sdkmcpsample/README.md b/sdk/python/assets/sdkmcpsample/README.md new file mode 100644 index 00000000000..a6a7137f903 --- /dev/null +++ b/sdk/python/assets/sdkmcpsample/README.md @@ -0,0 +1,256 @@ +# Azure ML SDK MCP Sample + +This folder contains an advanced sample notebook demonstrating comprehensive usage of Azure Machine Learning SDK v2 with a focus on MCP (Model Context Protocol) integration and enterprise-grade ML workflows. + +## Overview + +The `sdkmcpsample.ipynb` notebook provides an in-depth exploration of Azure ML SDK v2, covering advanced concepts and operations for experienced machine learning practitioners and MLOps engineers. + +## What You'll Learn + +- **Advanced Workspace Connection**: Robust authentication patterns with fallback mechanisms +- **Comprehensive Asset Management**: In-depth analysis of data, model, environment, and component assets +- **Advanced Compute Management**: Compute resource optimization and analysis +- **Sophisticated Data Operations**: Multi-dataset management with rich metadata and comprehensive analysis +- **Advanced Data Exploration**: Statistical analysis, visualizations, and data quality assessment +- **Job and Experiment Analytics**: Comprehensive workflow analysis and performance metrics +- **Model and Endpoint Management**: Advanced deployment strategies and endpoint health monitoring +- **Workspace Health Monitoring**: Performance metrics, health scoring, and optimization recommendations + +## Prerequisites + +Before running this notebook, ensure you have: + +1. **Azure Subscription**: An active Azure subscription with a configured Azure ML workspace +2. **Python Environment**: Python 3.8+ with the following packages: + ```bash + pip install azure-ai-ml azure-identity pandas scikit-learn matplotlib seaborn + ``` +3. **Authentication**: Proper credentials for accessing your Azure ML workspace +4. **Permissions**: Appropriate RBAC permissions for asset management and compute operations +5. **Basic ML Knowledge**: Understanding of machine learning concepts and workflows + +## Getting Started + +1. **Clone the Repository**: + ```bash + git clone https://github.com/Azure/azureml-examples + cd azureml-examples/sdk/python/assets/sdkmcpsample + ``` + +2. **Set Up Environment**: + ```bash + # Create virtual environment + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + + # Install dependencies + pip install azure-ai-ml azure-identity pandas scikit-learn matplotlib seaborn + ``` + +3. **Open the Notebook**: + - Open `sdkmcpsample.ipynb` in Jupyter Lab, Jupyter Notebook, or VS Code + - Alternatively, use Azure ML Studio notebooks for cloud execution + +4. **Configure Workspace Details**: + Update the workspace configuration in the notebook: + ```python + subscription_id = "<your-subscription-id>" + resource_group_name = "<your-resource-group>" + workspace_name = "<your-workspace-name>" + ``` + +5. **Run the Notebook**: + Execute the cells sequentially to learn about advanced Azure ML SDK v2 capabilities + +## Notebook Structure + +### 1. Setup and Installation +- Advanced package management and environment verification +- Comprehensive import statements for ML workflows + +### 2. Advanced Workspace Connection +- Robust authentication with multiple credential fallbacks +- Connection validation and error handling + +### 3. Comprehensive Workspace Assets Analysis +- Detailed analysis of all asset types with metadata +- Asset categorization and distribution analysis + +### 4. Advanced Compute Resource Management +- Compute resource optimization and scaling analysis +- Performance metrics and cost optimization + +### 5. Advanced Data Asset Management +- Multi-dataset registration with rich metadata +- Data lineage and versioning strategies + +### 6. Advanced Data Exploration and Analysis +- Statistical analysis with comprehensive visualizations +- Data quality assessment and anomaly detection + +### 7. Advanced Job and Experiment Management +- Workflow analysis and performance tracking +- Experiment comparison and optimization insights + +### 8. Model and Endpoint Management +- Advanced deployment strategies and patterns +- Endpoint health monitoring and performance analysis + +### 9. Workspace Health and Performance Metrics +- Comprehensive health scoring system +- Performance optimization recommendations + +## Key Features + +### Advanced Analytics +- **Statistical Analysis**: Comprehensive data profiling and quality assessment +- **Visualization Suite**: Professional-grade charts and insights +- **Performance Metrics**: Detailed workspace and resource utilization analysis + +### Enterprise Features +- **Health Monitoring**: Automated workspace health scoring +- **Optimization Recommendations**: AI-driven suggestions for improvement +- **Cost Analysis**: Resource utilization and cost optimization insights + +### Production Readiness +- **Error Handling**: Robust exception management and fallback strategies +- **Scalability**: Patterns for large-scale ML operations +- **Best Practices**: Enterprise-grade coding standards and patterns + +## Sample Data + +The notebook works with multiple datasets for comprehensive demonstration: + +- **Titanic Dataset**: Classification problem for passenger survival prediction +- **Diabetes Dataset**: Regression problem for medical outcome prediction +- **Custom Data**: Extensible framework for your own datasets + +## Authentication Options + +The notebook supports multiple authentication methods with intelligent fallback: + +- **DefaultAzureCredential**: Automatic authentication in Azure environments +- **InteractiveBrowserCredential**: Browser-based interactive authentication +- **Service Principal**: Programmatic authentication for automated workflows +- **Managed Identity**: Azure-managed identity for secure, passwordless authentication + +## Advanced Use Cases + +This sample is ideal for: + +- **MLOps Engineers**: Implementing enterprise-grade ML operations +- **Data Scientists**: Advanced model development and experimentation +- **Platform Teams**: Setting up and managing ML infrastructure +- **Architects**: Designing scalable ML solutions +- **DevOps Teams**: Integrating ML into CI/CD pipelines + +## Performance Optimization + +### Resource Management +- Compute auto-scaling strategies +- Data caching and optimization +- Model serving optimization + +### Cost Optimization +- Resource utilization monitoring +- Automated cost alerts and budgeting +- Right-sizing recommendations + +### Security Best Practices +- RBAC implementation patterns +- Network security configurations +- Data encryption and compliance + +## Related Examples + +After completing this advanced sample, explore these related notebooks: + +- **[Basic SDK Sample](../sdksample/sdksample.ipynb)**: Foundational SDK concepts +- **[MLOps Pipelines](../../../tutorials/mlops/)**: Production pipeline development +- **[AutoML Integration](../../../tutorials/automl/)**: Automated machine learning +- **[Distributed Training](../../../tutorials/distributed-training/)**: Large-scale model training +- **[Model Deployment](../../endpoints/)**: Advanced deployment strategies + +## Troubleshooting + +### Common Issues + +1. **Authentication Failures**: + ```bash + # Check Azure CLI login + az login + az account show + + # Verify workspace access + az ml workspace show --name <workspace-name> --resource-group <resource-group> + ``` + +2. **Package Dependencies**: + ```bash + # Update packages + pip install --upgrade azure-ai-ml azure-identity + + # Check versions + pip list | grep azure + ``` + +3. **Memory Issues with Large Datasets**: + - Use data streaming techniques + - Implement chunked processing + - Consider distributed computing options + +4. **Compute Resource Issues**: + - Verify compute quotas and limits + - Check resource availability in your region + - Review RBAC permissions for compute management + +### Performance Tuning + +1. **Data Loading Optimization**: + - Use Azure ML data streams for large datasets + - Implement data caching strategies + - Consider data preprocessing pipelines + +2. **Compute Optimization**: + - Right-size compute instances + - Use spot instances for cost savings + - Implement auto-scaling policies + +3. **Network Optimization**: + - Use VNet integration for security + - Optimize data transfer patterns + - Consider regional data placement + +### Getting Help + +- [Azure ML Documentation](https://docs.microsoft.com/azure/machine-learning/) +- [SDK v2 Migration Guide](https://docs.microsoft.com/azure/machine-learning/how-to-migrate-from-v1) +- [Azure ML Community](https://techcommunity.microsoft.com/t5/azure-ai/ct-p/Azure-AI) +- [GitHub Issues](https://github.com/Azure/azureml-examples/issues) +- [Stack Overflow](https://stackoverflow.com/questions/tagged/azure-machine-learning) + +## Contributing + +We welcome contributions to improve this advanced sample! Please see the [CONTRIBUTING.md](../../../../CONTRIBUTING.md) file for guidelines on: + +- Code standards and best practices +- Testing requirements +- Documentation standards +- Pull request process + +## Security + +For security-related concerns, please review our [SECURITY.md](../../../../SECURITY.md) file and follow responsible disclosure practices. + +## License + +This project is licensed under the MIT License - see the [LICENSE](../../../../LICENSE) file for details. + +## Changelog + +### Version 1.0 +- Initial release with comprehensive Azure ML SDK v2 coverage +- Advanced analytics and visualization capabilities +- Enterprise-grade health monitoring and optimization +- Production-ready error handling and best practices diff --git a/sdk/python/assets/sdkmcpsample/sdkmcpsample.ipynb b/sdk/python/assets/sdkmcpsample/sdkmcpsample.ipynb new file mode 100644 index 00000000000..071a1d194b9 --- /dev/null +++ b/sdk/python/assets/sdkmcpsample/sdkmcpsample.ipynb @@ -0,0 +1,763 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e4e00fe5", + "metadata": {}, + "source": [ + "# Azure ML SDK MCP Sample Notebook\n", + "\n", + "This notebook demonstrates advanced usage of the Azure Machine Learning SDK v2 with a focus on MCP (Model Context Protocol) integration and comprehensive ML workflows.\n", + "\n", + "## Overview\n", + "\n", + "This sample covers:\n", + "- Advanced workspace connection and authentication patterns\n", + "- Comprehensive asset management (data, models, environments, components)\n", + "- Compute resource management and optimization\n", + "- Advanced ML workflow operations and pipeline management\n", + "- Model deployment and endpoint management\n", + "- Best practices for production environments\n", + "\n", + "## Prerequisites\n", + "\n", + "- Azure subscription with Azure ML workspace\n", + "- Azure ML SDK v2 installed (`azure-ai-ml`)\n", + "- Proper authentication credentials\n", + "- Basic understanding of machine learning concepts" + ] + }, + { + "cell_type": "markdown", + "id": "703f828e", + "metadata": {}, + "source": [ + "## 1. Setup and Installation\n", + "\n", + "First, ensure you have the required packages installed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09746aff", + "metadata": {}, + "outputs": [], + "source": [ + "# Install required packages (run this if packages are not installed)\n", + "# !pip install azure-ai-ml azure-identity pandas scikit-learn matplotlib seaborn" + ] + }, + { + "cell_type": "markdown", + "id": "6b532d44", + "metadata": {}, + "source": [ + "## 2. Import Required Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b36ff21", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "from azure.ai.ml import MLClient\n", + "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", + "from azure.ai.ml.entities import (\n", + " Data, Model, Environment, Component, Job, \n", + " ManagedOnlineEndpoint, ManagedOnlineDeployment,\n", + " AmlCompute, ComputeInstance\n", + ")\n", + "from azure.ai.ml.constants import AssetTypes\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from datetime import datetime, timedelta" + ] + }, + { + "cell_type": "markdown", + "id": "9f232a24", + "metadata": {}, + "source": [ + "## 3. Advanced Workspace Connection\n", + "\n", + "Replace the placeholder values with your actual Azure subscription and workspace details:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86f3d739", + "metadata": {}, + "outputs": [], + "source": [ + "# Azure ML workspace details - Update these with your values\n", + "subscription_id = \"<your-subscription-id>\"\n", + "resource_group_name = \"<your-resource-group>\"\n", + "workspace_name = \"<your-workspace-name>\"\n", + "\n", + "# Initialize credential and ML client with error handling\n", + "def get_ml_client():\n", + " \"\"\"Get authenticated ML client with fallback credentials\"\"\"\n", + " credentials_to_try = [\n", + " (\"DefaultAzureCredential\", DefaultAzureCredential()),\n", + " (\"InteractiveBrowserCredential\", InteractiveBrowserCredential())\n", + " ]\n", + " \n", + " for cred_name, credential in credentials_to_try:\n", + " try:\n", + " ml_client = MLClient(\n", + " credential=credential,\n", + " subscription_id=subscription_id,\n", + " resource_group_name=resource_group_name,\n", + " workspace_name=workspace_name\n", + " )\n", + " # Test the connection\n", + " _ = ml_client.workspaces.get(workspace_name)\n", + " print(f\"✅ Successfully connected using {cred_name}\")\n", + " return ml_client\n", + " except Exception as e:\n", + " print(f\"❌ {cred_name} failed: {str(e)[:100]}...\")\n", + " continue\n", + " \n", + " raise Exception(\"All authentication methods failed\")\n", + "\n", + "# Get authenticated client\n", + "ml_client = get_ml_client()\n", + "\n", + "print(f\"Connected to workspace: {ml_client.workspace_name}\")\n", + "print(f\"Resource group: {ml_client.resource_group_name}\")\n", + "print(f\"Subscription: {ml_client.subscription_id}\")" + ] + }, + { + "cell_type": "markdown", + "id": "0f5677d9", + "metadata": {}, + "source": [ + "## 4. Comprehensive Workspace Assets Analysis\n", + "\n", + "Let's perform a detailed analysis of all assets in your workspace:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9babf5ce", + "metadata": {}, + "outputs": [], + "source": [ + "def analyze_workspace_assets():\n", + " \"\"\"Comprehensive analysis of workspace assets\"\"\"\n", + " \n", + " # Data Assets Analysis\n", + " print(\"=== DATA ASSETS ANALYSIS ===\")\n", + " data_assets = list(ml_client.data.list())\n", + " \n", + " if data_assets:\n", + " data_df = pd.DataFrame([\n", + " {\n", + " 'name': asset.name,\n", + " 'version': asset.version,\n", + " 'type': asset.type,\n", + " 'description': asset.description or 'No description',\n", + " 'created_date': asset.creation_context.created_at if hasattr(asset, 'creation_context') else 'Unknown'\n", + " } for asset in data_assets[:10] # Show first 10\n", + " ])\n", + " print(f\"Total data assets: {len(data_assets)}\")\n", + " print(\"\\nRecent data assets:\")\n", + " print(data_df.to_string(index=False))\n", + " \n", + " # Asset type distribution\n", + " type_counts = pd.Series([asset.type for asset in data_assets]).value_counts()\n", + " print(\"\\nData asset types distribution:\")\n", + " print(type_counts)\n", + " else:\n", + " print(\"No data assets found\")\n", + " \n", + " print(\"\\n\" + \"=\"*50)\n", + " \n", + " # Model Assets Analysis\n", + " print(\"=== MODEL ASSETS ANALYSIS ===\")\n", + " model_assets = list(ml_client.models.list())\n", + " \n", + " if model_assets:\n", + " print(f\"Total model assets: {len(model_assets)}\")\n", + " for i, model in enumerate(model_assets[:5]):\n", + " print(f\"{i+1}. {model.name} (v{model.version}) - {model.description or 'No description'}\")\n", + " if len(model_assets) > 5:\n", + " print(f\"... and {len(model_assets) - 5} more\")\n", + " else:\n", + " print(\"No model assets found\")\n", + " \n", + " print(\"\\n\" + \"=\"*50)\n", + " \n", + " # Environment Assets Analysis\n", + " print(\"=== ENVIRONMENT ASSETS ANALYSIS ===\")\n", + " env_assets = list(ml_client.environments.list())\n", + " \n", + " if env_assets:\n", + " print(f\"Total environment assets: {len(env_assets)}\")\n", + " \n", + " # Categorize environments\n", + " curated_envs = [env for env in env_assets if env.name.startswith('AzureML')]\n", + " custom_envs = [env for env in env_assets if not env.name.startswith('AzureML')]\n", + " \n", + " print(f\"Curated environments: {len(curated_envs)}\")\n", + " print(f\"Custom environments: {len(custom_envs)}\")\n", + " \n", + " print(\"\\nRecent custom environments:\")\n", + " for i, env in enumerate(custom_envs[:3]):\n", + " print(f\"{i+1}. {env.name} (v{env.version})\")\n", + " else:\n", + " print(\"No environment assets found\")\n", + "\n", + "analyze_workspace_assets()" + ] + }, + { + "cell_type": "markdown", + "id": "0037db6e", + "metadata": {}, + "source": [ + "## 5. Advanced Compute Resource Management\n", + "\n", + "Let's explore and manage compute resources comprehensively:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bca6ad84", + "metadata": {}, + "outputs": [], + "source": [ + "def analyze_compute_resources():\n", + " \"\"\"Comprehensive compute resource analysis\"\"\"\n", + " \n", + " print(\"=== COMPUTE RESOURCES ANALYSIS ===\")\n", + " compute_resources = list(ml_client.compute.list())\n", + " \n", + " if compute_resources:\n", + " compute_df = pd.DataFrame([\n", + " {\n", + " 'name': compute.name,\n", + " 'type': compute.type,\n", + " 'state': getattr(compute, 'provisioning_state', 'N/A'),\n", + " 'size': getattr(compute, 'size', 'N/A'),\n", + " 'min_nodes': getattr(compute, 'scale_settings', {}).get('min_node_count', 'N/A') if hasattr(compute, 'scale_settings') else 'N/A',\n", + " 'max_nodes': getattr(compute, 'scale_settings', {}).get('max_node_count', 'N/A') if hasattr(compute, 'scale_settings') else 'N/A'\n", + " } for compute in compute_resources\n", + " ])\n", + " \n", + " print(f\"Total compute resources: {len(compute_resources)}\")\n", + " print(\"\\nCompute resources details:\")\n", + " print(compute_df.to_string(index=False))\n", + " \n", + " # Compute type distribution\n", + " type_counts = compute_df['type'].value_counts()\n", + " print(\"\\nCompute types distribution:\")\n", + " print(type_counts)\n", + " \n", + " # State analysis\n", + " state_counts = compute_df['state'].value_counts()\n", + " print(\"\\nCompute states:\")\n", + " print(state_counts)\n", + " \n", + " else:\n", + " print(\"No compute resources found\")\n", + "\n", + "analyze_compute_resources()" + ] + }, + { + "cell_type": "markdown", + "id": "78020d5b", + "metadata": {}, + "source": [ + "## 6. Advanced Data Asset Management\n", + "\n", + "Create and manage data assets with comprehensive metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "272d0b16", + "metadata": {}, + "outputs": [], + "source": [ + "# Create multiple sample data assets with rich metadata\n", + "sample_datasets = [\n", + " {\n", + " \"name\": \"titanic-mcp-demo\",\n", + " \"path\": \"https://raw.githubusercontent.com/Azure/azureml-examples/main/sdk/python/assets/data/sample_data/titanic.csv\",\n", + " \"description\": \"Titanic dataset for MCP demonstration - passenger survival data\",\n", + " \"tags\": {\"source\": \"github\", \"type\": \"demo\", \"format\": \"csv\", \"domain\": \"transportation\", \"task\": \"classification\"}\n", + " },\n", + " {\n", + " \"name\": \"diabetes-mcp-demo\",\n", + " \"path\": \"https://raw.githubusercontent.com/Azure/azureml-examples/main/sdk/python/assets/data/sample_data/diabetes.csv\",\n", + " \"description\": \"Diabetes dataset for MCP demonstration - medical prediction data\",\n", + " \"tags\": {\"source\": \"github\", \"type\": \"demo\", \"format\": \"csv\", \"domain\": \"healthcare\", \"task\": \"regression\"}\n", + " }\n", + "]\n", + "\n", + "registered_assets = []\n", + "\n", + "for dataset_info in sample_datasets:\n", + " try:\n", + " sample_data = Data(\n", + " path=dataset_info[\"path\"],\n", + " type=AssetTypes.URI_FILE,\n", + " description=dataset_info[\"description\"],\n", + " name=dataset_info[\"name\"],\n", + " tags=dataset_info[\"tags\"]\n", + " )\n", + " \n", + " # Register the data asset\n", + " registered_data = ml_client.data.create_or_update(sample_data)\n", + " registered_assets.append(registered_data)\n", + " \n", + " print(f\"✅ Successfully registered: {registered_data.name}\")\n", + " print(f\" Version: {registered_data.version}\")\n", + " print(f\" Type: {registered_data.type}\")\n", + " print(f\" Description: {registered_data.description}\")\n", + " print(f\" Tags: {registered_data.tags}\")\n", + " print()\n", + " \n", + " except Exception as e:\n", + " print(f\"❌ Failed to register {dataset_info['name']}: {e}\")\n", + "\n", + "print(f\"Successfully registered {len(registered_assets)} data assets\")" + ] + }, + { + "cell_type": "markdown", + "id": "5cf6d492", + "metadata": {}, + "source": [ + "## 7. Advanced Data Exploration and Analysis\n", + "\n", + "Perform comprehensive data analysis with visualizations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef40376b", + "metadata": {}, + "outputs": [], + "source": [ + "def comprehensive_data_analysis(dataset_url, dataset_name):\n", + " \"\"\"Perform comprehensive data analysis with visualizations\"\"\"\n", + " \n", + " try:\n", + " # Read the dataset\n", + " df = pd.read_csv(dataset_url)\n", + " \n", + " print(f\"=== {dataset_name.upper()} DATASET ANALYSIS ===\")\n", + " print(f\"Dataset shape: {df.shape}\")\n", + " print(f\"Columns: {list(df.columns)}\")\n", + " \n", + " # Basic info\n", + " print(\"\\n=== DATASET INFO ===\")\n", + " print(df.info())\n", + " \n", + " # First few rows\n", + " print(\"\\n=== FIRST 5 ROWS ===\")\n", + " print(df.head())\n", + " \n", + " # Statistical summary\n", + " print(\"\\n=== STATISTICAL SUMMARY ===\")\n", + " print(df.describe())\n", + " \n", + " # Missing values analysis\n", + " print(\"\\n=== MISSING VALUES ANALYSIS ===\")\n", + " missing_data = df.isnull().sum()\n", + " missing_percent = (missing_data / len(df)) * 100\n", + " missing_df = pd.DataFrame({\n", + " 'Missing Count': missing_data,\n", + " 'Missing Percentage': missing_percent\n", + " })\n", + " print(missing_df[missing_df['Missing Count'] > 0])\n", + " \n", + " # Data quality insights\n", + " print(\"\\n=== DATA QUALITY INSIGHTS ===\")\n", + " print(f\"Total missing values: {df.isnull().sum().sum()}\")\n", + " print(f\"Duplicate rows: {df.duplicated().sum()}\")\n", + " print(f\"Unique rows: {df.drop_duplicates().shape[0]}\")\n", + " \n", + " # Visualization setup\n", + " plt.style.use('seaborn-v0_8')\n", + " fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + " fig.suptitle(f'{dataset_name} Dataset Analysis', fontsize=16)\n", + " \n", + " # Plot 1: Missing values heatmap\n", + " if df.isnull().sum().sum() > 0:\n", + " sns.heatmap(df.isnull(), ax=axes[0, 0], cbar=True, yticklabels=False)\n", + " axes[0, 0].set_title('Missing Values Heatmap')\n", + " else:\n", + " axes[0, 0].text(0.5, 0.5, 'No Missing Values', ha='center', va='center', transform=axes[0, 0].transAxes)\n", + " axes[0, 0].set_title('Missing Values Status')\n", + " \n", + " # Plot 2: Data types distribution\n", + " dtype_counts = df.dtypes.value_counts()\n", + " axes[0, 1].pie(dtype_counts.values, labels=dtype_counts.index, autopct='%1.1f%%')\n", + " axes[0, 1].set_title('Data Types Distribution')\n", + " \n", + " # Plot 3: Correlation matrix for numeric columns\n", + " numeric_cols = df.select_dtypes(include=['number']).columns\n", + " if len(numeric_cols) > 1:\n", + " corr_matrix = df[numeric_cols].corr()\n", + " sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, ax=axes[1, 0])\n", + " axes[1, 0].set_title('Correlation Matrix')\n", + " else:\n", + " axes[1, 0].text(0.5, 0.5, 'Insufficient numeric columns', ha='center', va='center', transform=axes[1, 0].transAxes)\n", + " axes[1, 0].set_title('Correlation Analysis')\n", + " \n", + " # Plot 4: Sample distribution of first numeric column\n", + " if len(numeric_cols) > 0:\n", + " first_numeric = numeric_cols[0]\n", + " df[first_numeric].hist(bins=20, ax=axes[1, 1], alpha=0.7)\n", + " axes[1, 1].set_title(f'Distribution of {first_numeric}')\n", + " axes[1, 1].set_xlabel(first_numeric)\n", + " axes[1, 1].set_ylabel('Frequency')\n", + " else:\n", + " axes[1, 1].text(0.5, 0.5, 'No numeric columns', ha='center', va='center', transform=axes[1, 1].transAxes)\n", + " axes[1, 1].set_title('Distribution Analysis')\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n", + " \n", + " return df\n", + " \n", + " except Exception as e:\n", + " print(f\"❌ Failed to analyze {dataset_name}: {e}\")\n", + " return None\n", + "\n", + "# Analyze Titanic dataset\n", + "titanic_df = comprehensive_data_analysis(\n", + " \"https://raw.githubusercontent.com/Azure/azureml-examples/main/sdk/python/assets/data/sample_data/titanic.csv\",\n", + " \"Titanic\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5a951b69", + "metadata": {}, + "source": [ + "## 8. Advanced Job and Experiment Management\n", + "\n", + "Comprehensive analysis of jobs and experiments:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e916dd9e", + "metadata": {}, + "outputs": [], + "source": [ + "def analyze_jobs_and_experiments():\n", + " \"\"\"Comprehensive job and experiment analysis\"\"\"\n", + " \n", + " print(\"=== JOBS AND EXPERIMENTS ANALYSIS ===\")\n", + " \n", + " try:\n", + " # Get recent jobs\n", + " jobs = list(ml_client.jobs.list(max_results=20))\n", + " \n", + " if jobs:\n", + " # Create jobs dataframe\n", + " jobs_data = []\n", + " for job in jobs:\n", + " jobs_data.append({\n", + " 'name': job.name,\n", + " 'display_name': getattr(job, 'display_name', 'N/A'),\n", + " 'type': job.type,\n", + " 'status': job.status,\n", + " 'experiment_name': getattr(job, 'experiment_name', 'N/A'),\n", + " 'created_date': job.creation_context.created_at if hasattr(job, 'creation_context') else 'Unknown'\n", + " })\n", + " \n", + " jobs_df = pd.DataFrame(jobs_data)\n", + " \n", + " print(f\"Total recent jobs: {len(jobs)}\")\n", + " print(\"\\nRecent jobs overview:\")\n", + " print(jobs_df.to_string(index=False))\n", + " \n", + " # Job status analysis\n", + " print(\"\\n=== JOB STATUS DISTRIBUTION ===\")\n", + " status_counts = jobs_df['status'].value_counts()\n", + " print(status_counts)\n", + " \n", + " # Job type analysis\n", + " print(\"\\n=== JOB TYPE DISTRIBUTION ===\")\n", + " type_counts = jobs_df['type'].value_counts()\n", + " print(type_counts)\n", + " \n", + " # Experiment analysis\n", + " print(\"\\n=== EXPERIMENT ANALYSIS ===\")\n", + " experiment_counts = jobs_df['experiment_name'].value_counts()\n", + " print(f\"Number of unique experiments: {len(experiment_counts)}\")\n", + " print(\"Top experiments by job count:\")\n", + " print(experiment_counts.head())\n", + " \n", + " # Time-based analysis\n", + " if 'created_date' in jobs_df.columns:\n", + " print(\"\\n=== TEMPORAL ANALYSIS ===\")\n", + " # Convert dates and analyze patterns\n", + " valid_dates = jobs_df[jobs_df['created_date'] != 'Unknown']['created_date']\n", + " if len(valid_dates) > 0:\n", + " print(f\"Jobs with valid timestamps: {len(valid_dates)}\")\n", + " print(f\"Date range: {valid_dates.min()} to {valid_dates.max()}\")\n", + " \n", + " else:\n", + " print(\"No jobs found in the workspace\")\n", + " \n", + " except Exception as e:\n", + " print(f\"Failed to analyze jobs: {e}\")\n", + "\n", + "analyze_jobs_and_experiments()" + ] + }, + { + "cell_type": "markdown", + "id": "cc0d08ac", + "metadata": {}, + "source": [ + "## 9. Model and Endpoint Management\n", + "\n", + "Advanced model deployment and endpoint management:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5cbd33c", + "metadata": {}, + "outputs": [], + "source": [ + "def analyze_endpoints():\n", + " \"\"\"Analyze existing endpoints in the workspace\"\"\"\n", + " \n", + " print(\"=== ENDPOINT ANALYSIS ===\")\n", + " \n", + " try:\n", + " # Online endpoints\n", + " online_endpoints = list(ml_client.online_endpoints.list())\n", + " \n", + " if online_endpoints:\n", + " print(f\"Total online endpoints: {len(online_endpoints)}\")\n", + " \n", + " for i, endpoint in enumerate(online_endpoints[:5]):\n", + " print(f\"\\n{i+1}. Endpoint: {endpoint.name}\")\n", + " print(f\" Location: {getattr(endpoint, 'location', 'N/A')}\")\n", + " print(f\" Provisioning State: {getattr(endpoint, 'provisioning_state', 'N/A')}\")\n", + " print(f\" Scoring URI: {getattr(endpoint, 'scoring_uri', 'N/A')}\")\n", + " \n", + " # Get deployments for this endpoint\n", + " try:\n", + " deployments = list(ml_client.online_deployments.list(endpoint_name=endpoint.name))\n", + " print(f\" Deployments: {len(deployments)}\")\n", + " for j, deployment in enumerate(deployments):\n", + " print(f\" {j+1}. {deployment.name} - {getattr(deployment, 'provisioning_state', 'N/A')}\")\n", + " except Exception as e:\n", + " print(f\" Could not retrieve deployments: {e}\")\n", + " \n", + " if len(online_endpoints) > 5:\n", + " print(f\"\\n... and {len(online_endpoints) - 5} more endpoints\")\n", + " else:\n", + " print(\"No online endpoints found\")\n", + " \n", + " # Batch endpoints\n", + " try:\n", + " batch_endpoints = list(ml_client.batch_endpoints.list())\n", + " print(f\"\\nTotal batch endpoints: {len(batch_endpoints)}\")\n", + " \n", + " for i, endpoint in enumerate(batch_endpoints[:3]):\n", + " print(f\"{i+1}. Batch Endpoint: {endpoint.name}\")\n", + " print(f\" Provisioning State: {getattr(endpoint, 'provisioning_state', 'N/A')}\")\n", + " \n", + " except Exception as e:\n", + " print(f\"Could not retrieve batch endpoints: {e}\")\n", + " \n", + " except Exception as e:\n", + " print(f\"Failed to analyze endpoints: {e}\")\n", + "\n", + "analyze_endpoints()" + ] + }, + { + "cell_type": "markdown", + "id": "07077c82", + "metadata": {}, + "source": [ + "## 10. Workspace Health and Performance Metrics\n", + "\n", + "Comprehensive workspace health analysis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6bd4fc0", + "metadata": {}, + "outputs": [], + "source": [ + "def workspace_health_check():\n", + " \"\"\"Comprehensive workspace health and performance analysis\"\"\"\n", + " \n", + " print(\"=== WORKSPACE HEALTH CHECK ===\")\n", + " \n", + " health_metrics = {\n", + " 'data_assets': 0,\n", + " 'model_assets': 0,\n", + " 'environment_assets': 0,\n", + " 'compute_resources': 0,\n", + " 'recent_jobs': 0,\n", + " 'online_endpoints': 0,\n", + " 'failed_jobs': 0,\n", + " 'running_jobs': 0\n", + " }\n", + " \n", + " try:\n", + " # Count assets\n", + " health_metrics['data_assets'] = len(list(ml_client.data.list()))\n", + " health_metrics['model_assets'] = len(list(ml_client.models.list()))\n", + " health_metrics['environment_assets'] = len(list(ml_client.environments.list()))\n", + " health_metrics['compute_resources'] = len(list(ml_client.compute.list()))\n", + " \n", + " # Analyze recent jobs\n", + " recent_jobs = list(ml_client.jobs.list(max_results=50))\n", + " health_metrics['recent_jobs'] = len(recent_jobs)\n", + " \n", + " if recent_jobs:\n", + " health_metrics['failed_jobs'] = sum(1 for job in recent_jobs if job.status == 'Failed')\n", + " health_metrics['running_jobs'] = sum(1 for job in recent_jobs if job.status in ['Running', 'Queued'])\n", + " \n", + " # Count endpoints\n", + " try:\n", + " health_metrics['online_endpoints'] = len(list(ml_client.online_endpoints.list()))\n", + " except:\n", + " health_metrics['online_endpoints'] = 0\n", + " \n", + " # Generate health report\n", + " print(\"\\n=== WORKSPACE METRICS ===\")\n", + " for metric, value in health_metrics.items():\n", + " print(f\"{metric.replace('_', ' ').title()}: {value}\")\n", + " \n", + " # Health score calculation\n", + " health_score = 0\n", + " max_score = 100\n", + " \n", + " # Asset diversity (40 points)\n", + " if health_metrics['data_assets'] > 0: health_score += 10\n", + " if health_metrics['model_assets'] > 0: health_score += 10\n", + " if health_metrics['environment_assets'] > 5: health_score += 10 # More than curated\n", + " if health_metrics['compute_resources'] > 0: health_score += 10\n", + " \n", + " # Activity (30 points)\n", + " if health_metrics['recent_jobs'] > 0: health_score += 15\n", + " if health_metrics['recent_jobs'] > 10: health_score += 15\n", + " \n", + " # Deployment (20 points)\n", + " if health_metrics['online_endpoints'] > 0: health_score += 20\n", + " \n", + " # Reliability (10 points)\n", + " if health_metrics['recent_jobs'] > 0:\n", + " failure_rate = health_metrics['failed_jobs'] / health_metrics['recent_jobs']\n", + " if failure_rate < 0.1: health_score += 10\n", + " elif failure_rate < 0.3: health_score += 5\n", + " \n", + " print(f\"\\n=== WORKSPACE HEALTH SCORE: {health_score}/{max_score} ===\")\n", + " \n", + " if health_score >= 80:\n", + " print(\"🟢 Excellent - Workspace is highly active and well-utilized\")\n", + " elif health_score >= 60:\n", + " print(\"🟡 Good - Workspace is active with room for improvement\")\n", + " elif health_score >= 40:\n", + " print(\"🟠 Fair - Consider increasing workspace utilization\")\n", + " else:\n", + " print(\"🔴 Needs Attention - Low workspace activity detected\")\n", + " \n", + " # Recommendations\n", + " print(\"\\n=== RECOMMENDATIONS ===\")\n", + " if health_metrics['data_assets'] == 0:\n", + " print(\"• Consider registering data assets for better data management\")\n", + " if health_metrics['model_assets'] == 0:\n", + " print(\"• Register trained models for version control and deployment\")\n", + " if health_metrics['compute_resources'] == 0:\n", + " print(\"• Set up compute resources for training and inference\")\n", + " if health_metrics['online_endpoints'] == 0:\n", + " print(\"• Consider deploying models to online endpoints for real-time inference\")\n", + " if health_metrics['failed_jobs'] > health_metrics['recent_jobs'] * 0.3:\n", + " print(\"• High job failure rate detected - review job configurations\")\n", + " \n", + " except Exception as e:\n", + " print(f\"Health check failed: {e}\")\n", + "\n", + "workspace_health_check()" + ] + }, + { + "cell_type": "markdown", + "id": "2df2b2aa", + "metadata": {}, + "source": [ + "## 11. Summary and Advanced Next Steps\n", + "\n", + "This comprehensive notebook demonstrated:\n", + "\n", + "✅ **Advanced Workspace Connection**: Robust authentication with fallback mechanisms \n", + "✅ **Comprehensive Asset Analysis**: Detailed exploration of all asset types with metadata \n", + "✅ **Advanced Compute Management**: In-depth compute resource analysis and optimization \n", + "✅ **Sophisticated Data Operations**: Multi-dataset management with rich metadata and analysis \n", + "✅ **Advanced Data Exploration**: Statistical analysis, visualizations, and data quality assessment \n", + "✅ **Job and Experiment Analytics**: Comprehensive workflow analysis and performance metrics \n", + "✅ **Endpoint Management**: Model deployment and endpoint health monitoring \n", + "✅ **Workspace Health Monitoring**: Performance metrics and optimization recommendations \n", + "\n", + "### Advanced Next Steps\n", + "\n", + "To further advance your Azure ML expertise, explore these advanced topics:\n", + "\n", + "- **[MLOps Pipelines](../../../tutorials/mlops)**: Enterprise-grade ML operations and automation\n", + "- **[AutoML Integration](../../../tutorials/automl)**: Automated machine learning workflows\n", + "- **[Responsible AI](../../../tutorials/responsible-ai)**: Fairness, explainability, and model governance\n", + "- **[Distributed Training](../../../tutorials/distributed-training)**: Large-scale model training\n", + "- **[Edge Deployment](../../../tutorials/edge-deployment)**: IoT and edge inference scenarios\n", + "- **[Feature Stores](../../../tutorials/feature-store)**: Advanced feature engineering and management\n", + "\n", + "### Advanced Resources\n", + "\n", + "- [Azure ML Architecture Patterns](https://docs.microsoft.com/azure/machine-learning/concept-ml-pipelines)\n", + "- [Production MLOps Guide](https://docs.microsoft.com/azure/machine-learning/concept-model-management-and-deployment)\n", + "- [Azure ML Best Practices](https://docs.microsoft.com/azure/machine-learning/concept-enterprise-security)\n", + "- [Advanced SDK Reference](https://docs.microsoft.com/python/api/azure-ai-ml/)\n", + "\n", + "### Performance Optimization Tips\n", + "\n", + "1. **Compute Optimization**: Use appropriate compute sizes and auto-scaling\n", + "2. **Data Optimization**: Implement data versioning and caching strategies\n", + "3. **Model Optimization**: Use model quantization and optimization techniques\n", + "4. **Cost Management**: Monitor and optimize resource usage\n", + "5. **Security**: Implement proper RBAC and network security" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/setup/setup-repo/README.md b/setup/setup-repo/README.md index 9af7bf20079..808d01c6044 100644 --- a/setup/setup-repo/README.md +++ b/setup/setup-repo/README.md @@ -24,4 +24,4 @@ Required CLI tools include: - `az ml` - `azcopy` -Ensure you `az login` and `azcopy login` and have permissions to set secrets via `gh`. +Ensure you `az login` and `azcopy login` and have permissions to set secrets via `gh`. \ No newline at end of file